diff mbox series

[RFC,bpf-next,v1,2/8] bpf: no_caller_saved_registers attribute for helper calls

Message ID 20240629094733.3863850-3-eddyz87@gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series no_caller_saved_registers attribute for helper calls | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-14 pending Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1089 this patch: 1089
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 11 maintainers not CCed: haoluo@google.com jolsa@kernel.org llvm@lists.linux.dev ndesaulniers@google.com morbo@google.com song@kernel.org john.fastabend@gmail.com justinstitt@google.com kpsingh@kernel.org nathan@kernel.org sdf@google.com
netdev/build_clang success Errors and warnings before: 1162 this patch: 1162
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7816 this patch: 7816
netdev/checkpatch warning CHECK: spaces preferred around that '/' (ctx:VxV) WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Eduard Zingerman June 29, 2024, 9:47 a.m. UTC
GCC and LLVM define a no_caller_saved_registers function attribute.
This attribute means that function scratches only some of
the caller saved registers defined by ABI.
For BPF the set of such registers could be defined as follows:
- R0 is scratched only if function is non-void;
- R1-R5 are scratched only if corresponding parameter type is defined
  in the function prototype.

This commit introduces flag bpf_func_prot->nocsr.
If this flag is set for some helper function, verifier assumes that
it follows no_caller_saved_registers calling convention.

The contract between kernel and clang allows to simultaneously use
such functions and maintain backwards compatibility with old
kernels that don't understand no_caller_saved_registers calls
(nocsr for short):

- clang generates a simple pattern for nocsr calls, e.g.:

    r1 = 1;
    r2 = 2;
    *(u64 *)(r10 - 8)  = r1;
    *(u64 *)(r10 - 16) = r2;
    call %[to_be_inlined_by_jit]
    r2 = *(u64 *)(r10 - 16);
    r1 = *(u64 *)(r10 - 8);
    r0 = r1;
    r0 += r2;
    exit;

- kernel removes unnecessary spills and fills, if called function is
  inlined by current JIT (with assumption that patch inserted by JIT
  honors nocsr contract, e.g. does not scratch r3-r5 for the example
  above), e.g. the code above would be transformed to:

    r1 = 1;
    r2 = 2;
    call %[to_be_inlined_by_jit]
    r0 = r1;
    r0 += r2;
    exit;

Technically, the transformation is split into the following phases:
- during check_cfg() function update_nocsr_pattern_marks() is used to
  find potential patterns;
- upon stack read or write access,
  function check_nocsr_stack_contract() is used to verify if
  stack offsets, presumably reserved for nocsr patterns, are used
  only from those patterns;
- function remove_nocsr_spills_fills(), called from bpf_check(),
  applies the rewrite for valid patterns.

See comment in match_and_mark_nocsr_pattern() for more details.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 include/linux/bpf.h          |   6 +
 include/linux/bpf_verifier.h |   9 ++
 kernel/bpf/verifier.c        | 300 ++++++++++++++++++++++++++++++++++-
 3 files changed, 307 insertions(+), 8 deletions(-)

Comments

Eduard Zingerman July 1, 2024, 7:01 p.m. UTC | #1
On Sat, 2024-06-29 at 02:47 -0700, Eduard Zingerman wrote:

[...]

> Technically, the transformation is split into the following phases:
> - during check_cfg() function update_nocsr_pattern_marks() is used to
>   find potential patterns;
> - upon stack read or write access,
>   function check_nocsr_stack_contract() is used to verify if
>   stack offsets, presumably reserved for nocsr patterns, are used
>   only from those patterns;
> - function remove_nocsr_spills_fills(), called from bpf_check(),
>   applies the rewrite for valid patterns.

Talked to Andrii today, he asked to make the following changes:
- move update_nocsr_pattern_marks() from check_cfg() to a separate pass;
- make remove_nocsr_spills_fills() a part of do_misc_fixups()

I'll wait for some comments before submitting v2.

[...]
Andrii Nakryiko July 2, 2024, 12:41 a.m. UTC | #2
On Sat, Jun 29, 2024 at 2:48 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> GCC and LLVM define a no_caller_saved_registers function attribute.
> This attribute means that function scratches only some of
> the caller saved registers defined by ABI.
> For BPF the set of such registers could be defined as follows:
> - R0 is scratched only if function is non-void;
> - R1-R5 are scratched only if corresponding parameter type is defined
>   in the function prototype.
>
> This commit introduces flag bpf_func_prot->nocsr.
> If this flag is set for some helper function, verifier assumes that
> it follows no_caller_saved_registers calling convention.
>
> The contract between kernel and clang allows to simultaneously use
> such functions and maintain backwards compatibility with old
> kernels that don't understand no_caller_saved_registers calls
> (nocsr for short):
>
> - clang generates a simple pattern for nocsr calls, e.g.:
>
>     r1 = 1;
>     r2 = 2;
>     *(u64 *)(r10 - 8)  = r1;
>     *(u64 *)(r10 - 16) = r2;
>     call %[to_be_inlined_by_jit]

"inline_by_jit" is misleading, it can be inlined by BPF verifier using
BPF instructions, not just by BPF JIT

>     r2 = *(u64 *)(r10 - 16);
>     r1 = *(u64 *)(r10 - 8);
>     r0 = r1;
>     r0 += r2;
>     exit;
>
> - kernel removes unnecessary spills and fills, if called function is
>   inlined by current JIT (with assumption that patch inserted by JIT
>   honors nocsr contract, e.g. does not scratch r3-r5 for the example
>   above), e.g. the code above would be transformed to:
>
>     r1 = 1;
>     r2 = 2;
>     call %[to_be_inlined_by_jit]
>     r0 = r1;
>     r0 += r2;
>     exit;
>
> Technically, the transformation is split into the following phases:
> - during check_cfg() function update_nocsr_pattern_marks() is used to
>   find potential patterns;
> - upon stack read or write access,
>   function check_nocsr_stack_contract() is used to verify if
>   stack offsets, presumably reserved for nocsr patterns, are used
>   only from those patterns;
> - function remove_nocsr_spills_fills(), called from bpf_check(),
>   applies the rewrite for valid patterns.
>
> See comment in match_and_mark_nocsr_pattern() for more details.
>
> Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  include/linux/bpf.h          |   6 +
>  include/linux/bpf_verifier.h |   9 ++
>  kernel/bpf/verifier.c        | 300 ++++++++++++++++++++++++++++++++++-
>  3 files changed, 307 insertions(+), 8 deletions(-)
>

[...]

> -static int find_subprog(struct bpf_verifier_env *env, int off)
> +/* Find subprogram that contains instruction at 'off' */
> +static int find_containing_subprog(struct bpf_verifier_env *env, int off)
>  {
> -       struct bpf_subprog_info *p;
> +       struct bpf_subprog_info *vals = env->subprog_info;
> +       int high = env->subprog_cnt - 1;
> +       int low = 0, ret = -ENOENT;
>
> -       p = bsearch(&off, env->subprog_info, env->subprog_cnt,
> -                   sizeof(env->subprog_info[0]), cmp_subprogs);
> -       if (!p)
> +       if (off >= env->prog->len || off < 0)
>                 return -ENOENT;
> -       return p - env->subprog_info;
>
> +       while (low <= high) {
> +               int mid = (low + high)/2;

styling nit: (...) / 2

> +               struct bpf_subprog_info *val = &vals[mid];
> +               int diff = off - val->start;
> +
> +               if (diff < 0) {

tbh, this hurts my brain. Why not write human-readable and more meaningful

if (off < val->start)

?


> +                       high = mid - 1;
> +               } else {
> +                       low = mid + 1;
> +                       /* remember last time mid.start <= off */
> +                       ret = mid;
> +               }

feel free to ignore, but I find this unnecessary `ret = mid` part a
bit inelegant. See find_linfo in kernel/bpf/log.c for how
lower_bound-like binary search could be implemented without this (I
mean the pattern where invariant keeps low or high as always
satisfying the condition and the other one being adjusted with +1 or
-1, depending on desired logic).


> +       }
> +       return ret;
> +}
> +
> +/* Find subprogram that starts exactly at 'off' */
> +static int find_subprog(struct bpf_verifier_env *env, int off)
> +{
> +       int idx;
> +
> +       idx = find_containing_subprog(env, off);
> +       if (idx < 0 || env->subprog_info[idx].start != off)
> +               return -ENOENT;
> +       return idx;
>  }
>

[...]

> +static u8 get_helper_reg_mask(const struct bpf_func_proto *fn)
> +{
> +       u8 mask;
> +       int i;
> +
> +       if (!fn->nocsr)
> +               return ALL_CALLER_SAVED_REGS;
> +
> +       mask = 0;
> +       mask |= fn->ret_type == RET_VOID ? 0 : BIT(BPF_REG_0);
> +       for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
> +               mask |= fn->arg_type[i] == ARG_DONTCARE ? 0 : BIT(BPF_REG_1 + i);

again subjective, but

if (fn->ret_type != RET_VOID)
    mask |= BIT(BPF_REG_0);

(and similarly for ARG_DONTCARE)

seems a bit more readable and not that much more verbose

> +       return mask;
> +}
> +
> +/* True if do_misc_fixups() replaces calls to helper number 'imm',
> + * replacement patch is presumed to follow no_caller_saved_registers contract
> + * (see match_and_mark_nocsr_pattern() below).
> + */
> +static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
> +{

note that there is now also bpf_jit_inlines_helper_call()


> +       return false;
> +}
> +
> +/* If 'insn' is a call that follows no_caller_saved_registers contract
> + * and called function is inlined by current jit, return a mask with
> + * 1s corresponding to registers that are scratched by this call
> + * (depends on return type and number of return parameters).
> + * Otherwise return ALL_CALLER_SAVED_REGS mask.
> + */
> +static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
> +{
> +       const struct bpf_func_proto *fn;
> +
> +       if (bpf_helper_call(insn) &&
> +           verifier_inlines_helper_call(env, insn->imm) &&

strictly speaking, does nocsr have anything to do with inlining,
though? E.g., if we know for sure (however, that's a separate issue)
that helper implementation doesn't touch extra registers, why do we
need inlining to make use of nocsr?

> +           get_helper_proto(env, insn->imm, &fn) == 0 &&
> +           fn->nocsr)
> +               return ~get_helper_reg_mask(fn);
> +
> +       return ALL_CALLER_SAVED_REGS;
> +}
> +

[...]

> + * For example, it is *not* safe to remove spill/fill below:
> + *
> + *   r1 = 1;
> + *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
> + *   call %[to_be_inlined_by_jit]  -->   call %[to_be_inlined_by_jit]
> + *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
> + *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
> + *   r0 += r1;                           exit;
> + *   exit;
> + */
> +static int match_and_mark_nocsr_pattern(struct bpf_verifier_env *env, int t, bool mark)
> +{
> +       struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
> +       struct bpf_subprog_info *subprog;
> +       u32 csr_mask = call_csr_mask(env, &insns[t]);
> +       u32 reg_mask = ~csr_mask | ~ALL_CALLER_SAVED_REGS;
> +       int s, i;
> +       s16 off;
> +
> +       if (csr_mask == ALL_CALLER_SAVED_REGS)
> +               return false;

false -> 0 ?

> +
> +       for (i = 1, off = 0; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
> +               if (t - i < 0 || t + i >= env->prog->len)
> +                       break;
> +               stx = &insns[t - i];
> +               ldx = &insns[t + i];
> +               if (off == 0) {
> +                       off = stx->off;
> +                       if (off % BPF_REG_SIZE != 0)
> +                               break;

just return here?

> +               }
> +               if (/* *(u64 *)(r10 - off) = r[0-5]? */
> +                   stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
> +                   stx->dst_reg != BPF_REG_10 ||
> +                   /* r[0-5] = *(u64 *)(r10 - off)? */
> +                   ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
> +                   ldx->src_reg != BPF_REG_10 ||
> +                   /* check spill/fill for the same reg and offset */
> +                   stx->src_reg != ldx->dst_reg ||
> +                   stx->off != ldx->off ||
> +                   stx->off != off ||
> +                   /* this should be a previously unseen register */
> +                   BIT(stx->src_reg) & reg_mask)
> +                       break;
> +               reg_mask |= BIT(stx->src_reg);
> +               if (mark) {
> +                       env->insn_aux_data[t - i].nocsr_pattern = true;
> +                       env->insn_aux_data[t + i].nocsr_pattern = true;
> +               }
> +       }
> +       if (i == 1)
> +               return 0;
> +       if (mark) {
> +               s = find_containing_subprog(env, t);
> +               /* can't happen */
> +               if (WARN_ON_ONCE(s < 0))
> +                       return 0;
> +               subprog = &env->subprog_info[s];
> +               subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
> +       }

why not split pattern detection and all this other marking logic? You
can return "the size of csr pattern", meaning how many spills/fills
are there surrounding the call, no? Then all the marking can be done
(if necessary) by the caller.

The question is what to do with zero patter (no spills/fills for nocsr
call, is that valid case?)

> +       return i - 1;
> +}
> +
> +/* If instruction 't' is a nocsr call surrounded by spill/fill pairs,
> + * update env->subprog_info[_]->nocsr_stack_off and
> + * env->insn_aux_data[_].nocsr_pattern fields.
> + */
> +static void update_nocsr_pattern_marks(struct bpf_verifier_env *env, int t)
> +{
> +       match_and_mark_nocsr_pattern(env, t, true);
> +}
> +
> +/* If instruction 't' is a nocsr call surrounded by spill/fill pairs,
> + * return the number of such pairs.
> + */
> +static int match_nocsr_pattern(struct bpf_verifier_env *env, int t)
> +{
> +       return match_and_mark_nocsr_pattern(env, t, false);
> +}
> +
>  /* Visits the instruction at index t and returns one of the following:
>   *  < 0 - an error occurred
>   *  DONE_EXPLORING - the instruction was fully explored
> @@ -16017,6 +16262,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
>                                 mark_force_checkpoint(env, t);
>                         }
>                 }
> +               if (insn->src_reg == 0)
> +                       update_nocsr_pattern_marks(env, t);

as you mentioned, we discussed moving this from check_cfg() step, as
it doesn't seem to be coupled with "graph" part of the algorithm

>                 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
>
>         case BPF_JA:
> @@ -19063,15 +19310,16 @@ static int opt_remove_dead_code(struct bpf_verifier_env *env)
>         return 0;
>  }
>
> +static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
> +
>  static int opt_remove_nops(struct bpf_verifier_env *env)
>  {
> -       const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
>         struct bpf_insn *insn = env->prog->insnsi;
>         int insn_cnt = env->prog->len;
>         int i, err;
>
>         for (i = 0; i < insn_cnt; i++) {
> -               if (memcmp(&insn[i], &ja, sizeof(ja)))
> +               if (memcmp(&insn[i], &NOP, sizeof(NOP)))
>                         continue;
>
>                 err = verifier_remove_insns(env, i, 1);
> @@ -20801,6 +21049,39 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
>         return 0;
>  }
>
> +/* Remove unnecessary spill/fill pairs, members of nocsr pattern.
> + * Do this as a separate pass to avoid interfering with helper/kfunc
> + * inlining logic in do_misc_fixups().
> + * See comment for match_and_mark_nocsr_pattern().
> + */
> +static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
> +{
> +       struct bpf_subprog_info *subprogs = env->subprog_info;
> +       int i, j, spills_num, cur_subprog = 0;
> +       struct bpf_insn *insn = env->prog->insnsi;
> +       int insn_cnt = env->prog->len;
> +
> +       for (i = 0; i < insn_cnt; i++, insn++) {
> +               spills_num = match_nocsr_pattern(env, i);

we can probably afford a single-byte field somewhere in
bpf_insn_aux_data to remember "csr pattern size" instead of just a
true/false fact that it is nocsr call. And so we wouldn't need to do
pattern matching again here, we'll just have all the data.


> +               if (spills_num == 0)
> +                       goto next_insn;
> +               for (j = 1; j <= spills_num; ++j)
> +                       if ((insn - j)->off >= subprogs[cur_subprog].nocsr_stack_off ||
> +                           (insn + j)->off >= subprogs[cur_subprog].nocsr_stack_off)
> +                               goto next_insn;
> +               /* NOPs are removed by opt_remove_nops() later */
> +               for (j = 1; j <= spills_num; ++j) {
> +                       *(insn - j) = NOP;
> +                       *(insn + j) = NOP;
> +               }
> +
> +next_insn:
> +               if (subprogs[cur_subprog + 1].start == i + 1)
> +                       cur_subprog++;
> +       }
> +       return 0;
> +}
> +
>  static void free_states(struct bpf_verifier_env *env)
>  {
>         struct bpf_verifier_state_list *sl, *sln;
> @@ -21719,6 +22000,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>         if (ret == 0)
>                 ret = optimize_bpf_loop(env);
>
> +       if (ret == 0)
> +               ret = remove_nocsr_spills_fills(env);
> +
>         if (is_priv) {
>                 if (ret == 0)
>                         opt_hard_wire_dead_code_branches(env);
> --
> 2.45.2
>
Eduard Zingerman July 2, 2024, 8:38 p.m. UTC | #3
On Mon, 2024-07-01 at 17:41 -0700, Andrii Nakryiko wrote:

[...]

> > - clang generates a simple pattern for nocsr calls, e.g.:
> > 
> >     r1 = 1;
> >     r2 = 2;
> >     *(u64 *)(r10 - 8)  = r1;
> >     *(u64 *)(r10 - 16) = r2;
> >     call %[to_be_inlined_by_jit]
> 
> "inline_by_jit" is misleading, it can be inlined by BPF verifier using
> BPF instructions, not just by BPF JIT

Will change

> >     r2 = *(u64 *)(r10 - 16);
> >     r1 = *(u64 *)(r10 - 8);
> >     r0 = r1;
> >     r0 += r2;
> >     exit;

[...]
> 
> > -static int find_subprog(struct bpf_verifier_env *env, int off)
> > +/* Find subprogram that contains instruction at 'off' */
> > +static int find_containing_subprog(struct bpf_verifier_env *env, int off)
> >  {
> > -       struct bpf_subprog_info *p;
> > +       struct bpf_subprog_info *vals = env->subprog_info;
> > +       int high = env->subprog_cnt - 1;
> > +       int low = 0, ret = -ENOENT;
> > 
> > -       p = bsearch(&off, env->subprog_info, env->subprog_cnt,
> > -                   sizeof(env->subprog_info[0]), cmp_subprogs);
> > -       if (!p)
> > +       if (off >= env->prog->len || off < 0)
> >                 return -ENOENT;
> > -       return p - env->subprog_info;
> > 
> > +       while (low <= high) {
> > +               int mid = (low + high)/2;
> 
> styling nit: (...) / 2
> 
> > +               struct bpf_subprog_info *val = &vals[mid];
> > +               int diff = off - val->start;
> > +
> > +               if (diff < 0) {
> 
> tbh, this hurts my brain. Why not write human-readable and more meaningful
> 
> if (off < val->start)
> 
> ?

No reason, will change.

> > +                       high = mid - 1;
> > +               } else {
> > +                       low = mid + 1;
> > +                       /* remember last time mid.start <= off */
> > +                       ret = mid;
> > +               }
> 
> feel free to ignore, but I find this unnecessary `ret = mid` part a
> bit inelegant. See find_linfo in kernel/bpf/log.c for how
> lower_bound-like binary search could be implemented without this (I
> mean the pattern where invariant keeps low or high as always
> satisfying the condition and the other one being adjusted with +1 or
> -1, depending on desired logic).

Will steal the code from there, thank you.

[...]

> > +static u8 get_helper_reg_mask(const struct bpf_func_proto *fn)
> > +{
> > +       u8 mask;
> > +       int i;
> > +
> > +       if (!fn->nocsr)
> > +               return ALL_CALLER_SAVED_REGS;
> > +
> > +       mask = 0;
> > +       mask |= fn->ret_type == RET_VOID ? 0 : BIT(BPF_REG_0);
> > +       for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
> > +               mask |= fn->arg_type[i] == ARG_DONTCARE ? 0 : BIT(BPF_REG_1 + i);
> 
> again subjective, but
> 
> if (fn->ret_type != RET_VOID)
>     mask |= BIT(BPF_REG_0);
> 
> (and similarly for ARG_DONTCARE)
> 
> seems a bit more readable and not that much more verbose

Sure, will change.

[...]

> > +static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
> > +{
> 
> note that there is now also bpf_jit_inlines_helper_call()

There is.
I'd like authors of jit inline patches to explicitly opt-in
for nocsr, vouching that inline patch follows nocsr contract.
The idea is that people would extend call_csr_mask() as necessary.

> 
> 
> > +       return false;
> > +}
> > +
> > +/* If 'insn' is a call that follows no_caller_saved_registers contract
> > + * and called function is inlined by current jit, return a mask with
> > + * 1s corresponding to registers that are scratched by this call
> > + * (depends on return type and number of return parameters).
> > + * Otherwise return ALL_CALLER_SAVED_REGS mask.
> > + */
> > +static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > +{
> > +       const struct bpf_func_proto *fn;
> > +
> > +       if (bpf_helper_call(insn) &&
> > +           verifier_inlines_helper_call(env, insn->imm) &&
> 
> strictly speaking, does nocsr have anything to do with inlining,
> though? E.g., if we know for sure (however, that's a separate issue)
> that helper implementation doesn't touch extra registers, why do we
> need inlining to make use of nocsr?

Technically, alternative for nocsr is for C version of the
helper/kfunc itself has no_caller_saved_registers attribute.
Grep shows a single function annotated as such in kernel tree:
stackleak_track_stack().
Or, maybe, for helpers/kfuncs implemented in assembly.
Whenever such helpers/kfuncs are added, this function could be extended.

> 
> > +           get_helper_proto(env, insn->imm, &fn) == 0 &&
> > +           fn->nocsr)
> > +               return ~get_helper_reg_mask(fn);
> > +
> > +       return ALL_CALLER_SAVED_REGS;
> > +}
> > +

[...]

> > +static int match_and_mark_nocsr_pattern(struct bpf_verifier_env *env, int t, bool mark)
> > +{
> > +       struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
> > +       struct bpf_subprog_info *subprog;
> > +       u32 csr_mask = call_csr_mask(env, &insns[t]);
> > +       u32 reg_mask = ~csr_mask | ~ALL_CALLER_SAVED_REGS;
> > +       int s, i;
> > +       s16 off;
> > +
> > +       if (csr_mask == ALL_CALLER_SAVED_REGS)
> > +               return false;
> 
> false -> 0 ?

Right.

> 
> > +
> > +       for (i = 1, off = 0; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
> > +               if (t - i < 0 || t + i >= env->prog->len)
> > +                       break;
> > +               stx = &insns[t - i];
> > +               ldx = &insns[t + i];
> > +               if (off == 0) {
> > +                       off = stx->off;
> > +                       if (off % BPF_REG_SIZE != 0)
> > +                               break;
> 

Makes sense.

> > +               }
> > +               if (/* *(u64 *)(r10 - off) = r[0-5]? */
> > +                   stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
> > +                   stx->dst_reg != BPF_REG_10 ||
> > +                   /* r[0-5] = *(u64 *)(r10 - off)? */
> > +                   ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
> > +                   ldx->src_reg != BPF_REG_10 ||
> > +                   /* check spill/fill for the same reg and offset */
> > +                   stx->src_reg != ldx->dst_reg ||
> > +                   stx->off != ldx->off ||
> > +                   stx->off != off ||
> > +                   /* this should be a previously unseen register */
> > +                   BIT(stx->src_reg) & reg_mask)
> > +                       break;
> > +               reg_mask |= BIT(stx->src_reg);
> > +               if (mark) {
> > +                       env->insn_aux_data[t - i].nocsr_pattern = true;
> > +                       env->insn_aux_data[t + i].nocsr_pattern = true;
> > +               }
> > +       }
> > +       if (i == 1)
> > +               return 0;
> > +       if (mark) {
> > +               s = find_containing_subprog(env, t);
> > +               /* can't happen */
> > +               if (WARN_ON_ONCE(s < 0))
> > +                       return 0;
> > +               subprog = &env->subprog_info[s];
> > +               subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
> > +       }
> 
> why not split pattern detection and all this other marking logic? You
> can return "the size of csr pattern", meaning how many spills/fills
> are there surrounding the call, no? Then all the marking can be done
> (if necessary) by the caller.

I'll try recording pattern size for function call,
so no split would be necessary.

> The question is what to do with zero patter (no spills/fills for nocsr
> call, is that valid case?)

Zero pattern -- no spills/fills to remove, so nothing to do.
I will add a test case, but it should be handled by current implementation fine.

[...]

> > +/* Remove unnecessary spill/fill pairs, members of nocsr pattern.
> > + * Do this as a separate pass to avoid interfering with helper/kfunc
> > + * inlining logic in do_misc_fixups().
> > + * See comment for match_and_mark_nocsr_pattern().
> > + */
> > +static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
> > +{
> > +       struct bpf_subprog_info *subprogs = env->subprog_info;
> > +       int i, j, spills_num, cur_subprog = 0;
> > +       struct bpf_insn *insn = env->prog->insnsi;
> > +       int insn_cnt = env->prog->len;
> > +
> > +       for (i = 0; i < insn_cnt; i++, insn++) {
> > +               spills_num = match_nocsr_pattern(env, i);
> 
> we can probably afford a single-byte field somewhere in
> bpf_insn_aux_data to remember "csr pattern size" instead of just a
> true/false fact that it is nocsr call. And so we wouldn't need to do
> pattern matching again here, we'll just have all the data.

Makes sense.

[...]
Andrii Nakryiko July 2, 2024, 9:09 p.m. UTC | #4
On Tue, Jul 2, 2024 at 1:38 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Mon, 2024-07-01 at 17:41 -0700, Andrii Nakryiko wrote:
>
> [...]
>

[...]

> > > +static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
> > > +{
> >
> > note that there is now also bpf_jit_inlines_helper_call()
>
> There is.
> I'd like authors of jit inline patches to explicitly opt-in
> for nocsr, vouching that inline patch follows nocsr contract.
> The idea is that people would extend call_csr_mask() as necessary.
>

you are defining a general framework with these changes, though, so
let's introduce a standard and simple way to do this. Say, in addition
to having arch-specific bpf_jit_inlines_helper_call() we can have
bpf_jit_supports_helper_nocsr() or something. And they should be
defined next to each other, so whenever one changes it's easier to
remember to change the other one.

I don't think requiring arm64 contributors to change the code of
call_csr_mask() is the right approach.

> >
> >
> > > +       return false;
> > > +}
> > > +
> > > +/* If 'insn' is a call that follows no_caller_saved_registers contract
> > > + * and called function is inlined by current jit, return a mask with
> > > + * 1s corresponding to registers that are scratched by this call
> > > + * (depends on return type and number of return parameters).
> > > + * Otherwise return ALL_CALLER_SAVED_REGS mask.
> > > + */
> > > +static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > > +{
> > > +       const struct bpf_func_proto *fn;
> > > +
> > > +       if (bpf_helper_call(insn) &&
> > > +           verifier_inlines_helper_call(env, insn->imm) &&
> >
> > strictly speaking, does nocsr have anything to do with inlining,
> > though? E.g., if we know for sure (however, that's a separate issue)
> > that helper implementation doesn't touch extra registers, why do we
> > need inlining to make use of nocsr?
>
> Technically, alternative for nocsr is for C version of the
> helper/kfunc itself has no_caller_saved_registers attribute.
> Grep shows a single function annotated as such in kernel tree:
> stackleak_track_stack().
> Or, maybe, for helpers/kfuncs implemented in assembly.

Yes, I suppose it's too dangerous to rely on the compiler to not use
some extra register. I guess worst case we can "inline" helper by
keeping call to it intact :)

> Whenever such helpers/kfuncs are added, this function could be extended.
>
> >
> > > +           get_helper_proto(env, insn->imm, &fn) == 0 &&
> > > +           fn->nocsr)
> > > +               return ~get_helper_reg_mask(fn);
> > > +
> > > +       return ALL_CALLER_SAVED_REGS;
> > > +}
> > > +
>
> [...]
>

[...]

> > > +       if (i == 1)
> > > +               return 0;
> > > +       if (mark) {
> > > +               s = find_containing_subprog(env, t);
> > > +               /* can't happen */
> > > +               if (WARN_ON_ONCE(s < 0))
> > > +                       return 0;
> > > +               subprog = &env->subprog_info[s];
> > > +               subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
> > > +       }
> >
> > why not split pattern detection and all this other marking logic? You
> > can return "the size of csr pattern", meaning how many spills/fills
> > are there surrounding the call, no? Then all the marking can be done
> > (if necessary) by the caller.
>
> I'll try recording pattern size for function call,
> so no split would be necessary.
>

ack

> > The question is what to do with zero patter (no spills/fills for nocsr
> > call, is that valid case?)
>
> Zero pattern -- no spills/fills to remove, so nothing to do.
> I will add a test case, but it should be handled by current implementation fine.

yep, thanks

>
> [...]
>
> > > +/* Remove unnecessary spill/fill pairs, members of nocsr pattern.
> > > + * Do this as a separate pass to avoid interfering with helper/kfunc
> > > + * inlining logic in do_misc_fixups().
> > > + * See comment for match_and_mark_nocsr_pattern().
> > > + */
> > > +static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
> > > +{
> > > +       struct bpf_subprog_info *subprogs = env->subprog_info;
> > > +       int i, j, spills_num, cur_subprog = 0;
> > > +       struct bpf_insn *insn = env->prog->insnsi;
> > > +       int insn_cnt = env->prog->len;
> > > +
> > > +       for (i = 0; i < insn_cnt; i++, insn++) {
> > > +               spills_num = match_nocsr_pattern(env, i);
> >
> > we can probably afford a single-byte field somewhere in
> > bpf_insn_aux_data to remember "csr pattern size" instead of just a
> > true/false fact that it is nocsr call. And so we wouldn't need to do
> > pattern matching again here, we'll just have all the data.
>
> Makes sense.
>
> [...]
Eduard Zingerman July 2, 2024, 9:19 p.m. UTC | #5
On Tue, 2024-07-02 at 14:09 -0700, Andrii Nakryiko wrote:

[...]

> you are defining a general framework with these changes, though, so
> let's introduce a standard and simple way to do this. Say, in addition
> to having arch-specific bpf_jit_inlines_helper_call() we can have
> bpf_jit_supports_helper_nocsr() or something. And they should be
> defined next to each other, so whenever one changes it's easier to
> remember to change the other one.
> 
> I don't think requiring arm64 contributors to change the code of
> call_csr_mask() is the right approach.

I'd change the return value for bpf_jit_inlines_helper_call() to enum,
to avoid naming functions several times.

[...]

> > > strictly speaking, does nocsr have anything to do with inlining,
> > > though? E.g., if we know for sure (however, that's a separate issue)
> > > that helper implementation doesn't touch extra registers, why do we
> > > need inlining to make use of nocsr?
> > 
> > Technically, alternative for nocsr is for C version of the
> > helper/kfunc itself has no_caller_saved_registers attribute.
> > Grep shows a single function annotated as such in kernel tree:
> > stackleak_track_stack().
> > Or, maybe, for helpers/kfuncs implemented in assembly.
> 
> Yes, I suppose it's too dangerous to rely on the compiler to not use
> some extra register. I guess worst case we can "inline" helper by
> keeping call to it intact :)

Something like that.

[...]
Andrii Nakryiko July 2, 2024, 9:22 p.m. UTC | #6
On Tue, Jul 2, 2024 at 2:19 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2024-07-02 at 14:09 -0700, Andrii Nakryiko wrote:
>
> [...]
>
> > you are defining a general framework with these changes, though, so
> > let's introduce a standard and simple way to do this. Say, in addition
> > to having arch-specific bpf_jit_inlines_helper_call() we can have
> > bpf_jit_supports_helper_nocsr() or something. And they should be
> > defined next to each other, so whenever one changes it's easier to
> > remember to change the other one.
> >
> > I don't think requiring arm64 contributors to change the code of
> > call_csr_mask() is the right approach.
>
> I'd change the return value for bpf_jit_inlines_helper_call() to enum,
> to avoid naming functions several times.

Hm... I don't know, feels ugly. nocsr is sort of a dependent but
orthogonal axis, so I'd keep it separate. Imagine we add yet another
property that relies on inlining, but is independent of nocsr. I guess
we can have enum be a bit set, but I'd start simple with two
functions.

>
> [...]
>
> > > > strictly speaking, does nocsr have anything to do with inlining,
> > > > though? E.g., if we know for sure (however, that's a separate issue)
> > > > that helper implementation doesn't touch extra registers, why do we
> > > > need inlining to make use of nocsr?
> > >
> > > Technically, alternative for nocsr is for C version of the
> > > helper/kfunc itself has no_caller_saved_registers attribute.
> > > Grep shows a single function annotated as such in kernel tree:
> > > stackleak_track_stack().
> > > Or, maybe, for helpers/kfuncs implemented in assembly.
> >
> > Yes, I suppose it's too dangerous to rely on the compiler to not use
> > some extra register. I guess worst case we can "inline" helper by
> > keeping call to it intact :)
>
> Something like that.
>
> [...]
Puranjay Mohan July 3, 2024, 11:57 a.m. UTC | #7
Eduard Zingerman <eddyz87@gmail.com> writes:

> GCC and LLVM define a no_caller_saved_registers function attribute.
> This attribute means that function scratches only some of
> the caller saved registers defined by ABI.
> For BPF the set of such registers could be defined as follows:
> - R0 is scratched only if function is non-void;
> - R1-R5 are scratched only if corresponding parameter type is defined
>   in the function prototype.
>
> This commit introduces flag bpf_func_prot->nocsr.
> If this flag is set for some helper function, verifier assumes that
> it follows no_caller_saved_registers calling convention.
>
> The contract between kernel and clang allows to simultaneously use
> such functions and maintain backwards compatibility with old
> kernels that don't understand no_caller_saved_registers calls
> (nocsr for short):
>
> - clang generates a simple pattern for nocsr calls, e.g.:
>
>     r1 = 1;
>     r2 = 2;
>     *(u64 *)(r10 - 8)  = r1;
>     *(u64 *)(r10 - 16) = r2;
>     call %[to_be_inlined_by_jit]

The call can be inlined by the verifier (in do_misc_fixups()) or by the
JIT if bpf_jit_inlines_helper_call() is true for a helper.

>     r2 = *(u64 *)(r10 - 16);
>     r1 = *(u64 *)(r10 - 8);
>     r0 = r1;
>     r0 += r2;
>     exit;
>
> - kernel removes unnecessary spills and fills, if called function is
>   inlined by current JIT (with assumption that patch inserted by JIT
>   honors nocsr contract, e.g. does not scratch r3-r5 for the example
>   above), e.g. the code above would be transformed to:
>
>     r1 = 1;
>     r2 = 2;
>     call %[to_be_inlined_by_jit]

Same as above

>     r0 = r1;
>     r0 += r2;
>     exit;
>

[.....]

> +/* True if do_misc_fixups() replaces calls to helper number 'imm',
> + * replacement patch is presumed to follow no_caller_saved_registers contract
> + * (see match_and_mark_nocsr_pattern() below).
> + */
> +static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
> +{
> +	return false;
> +}
> +
> +/* If 'insn' is a call that follows no_caller_saved_registers contract
> + * and called function is inlined by current jit, return a mask with

We should update the comment to: inlined by the verifier or by the
current JIT.

> + * 1s corresponding to registers that are scratched by this call
> + * (depends on return type and number of return parameters).
> + * Otherwise return ALL_CALLER_SAVED_REGS mask.
> + */
> +static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
> +{
> +	const struct bpf_func_proto *fn;
> +
> +	if (bpf_helper_call(insn) &&
> +	    verifier_inlines_helper_call(env, insn->imm) &&

This should also check bpf_jit_inlines_helper_call(insn->imm) as the JIT
can also inline helper calls separately from the verifier.

    if (bpf_helper_call(insn) &&
        (verifier_inlines_helper_call(env, insn->imm) || bpf_jit_inlines_helper_call(insn->imm)) &&

This is currently being done by the arm64 and risc-v JITs and they don't
scratch any register except R0 (The helpers inlined by these JITs are
non-void).

> +	    get_helper_proto(env, insn->imm, &fn) == 0 &&
> +	    fn->nocsr)
> +		return ~get_helper_reg_mask(fn);
> +
> +	return ALL_CALLER_SAVED_REGS;
> +}
> +
> +/* GCC and LLVM define a no_caller_saved_registers function attribute.
> + * This attribute means that function scratches only some of
> + * the caller saved registers defined by ABI.
> + * For BPF the set of such registers could be defined as follows:
> + * - R0 is scratched only if function is non-void;
> + * - R1-R5 are scratched only if corresponding parameter type is defined
> + *   in the function prototype.
> + *
> + * The contract between kernel and clang allows to simultaneously use
> + * such functions and maintain backwards compatibility with old
> + * kernels that don't understand no_caller_saved_registers calls
> + * (nocsr for short):
> + *
> + * - for nocsr calls clang allocates registers as-if relevant r0-r5
> + *   registers are not scratched by the call;
> + *
> + * - as a post-processing step, clang visits each nocsr call and adds
> + *   spill/fill for every live r0-r5;
> + *
> + * - stack offsets used for the spill/fill are allocated as minimal
> + *   stack offsets in whole function and are not used for any other
> + *   purposes;
> + *
> + * - when kernel loads a program, it looks for such patterns
> + *   (nocsr function surrounded by spills/fills) and checks if
> + *   spill/fill stack offsets are used exclusively in nocsr patterns;
> + *
> + * - if so, and if current JIT inlines the call to the nocsr function

We should update the comment to: if the verifier or the current JIT.

> + *   (e.g. a helper call), kernel removes unnecessary spill/fill pairs;
> + *
> + * - when old kernel loads a program, presence of spill/fill pairs
> + *   keeps BPF program valid, albeit slightly less efficient.
> + *
> + * For example:
> + *
> + *   r1 = 1;
> + *   r2 = 2;
> + *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
> + *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
> + *   call %[to_be_inlined_by_jit]  -->   call %[to_be_inlined_by_jit]

We should update this to say: to_be_inlined_by_verifier_or_jit

> + *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
> + *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
> + *   r0 = r1;                            exit;
> + *   r0 += r2;
> + *   exit;
> + *
> + * The purpose of match_and_mark_nocsr_pattern is to:
> + * - look for such patterns;
> + * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern;
> + * - update env->subprog_info[*]->nocsr_stack_off to find an offset
> + *   at which nocsr spill/fill stack slots start.
> + *
> + * The .nocsr_pattern and .nocsr_stack_off are used by
> + * check_nocsr_stack_contract() to check if every stack access to
> + * nocsr spill/fill stack slot originates from spill/fill
> + * instructions, members of nocsr patterns.
> + *
> + * If such condition holds true for a subprogram, nocsr patterns could
> + * be rewritten by remove_nocsr_spills_fills().
> + * Otherwise nocsr patterns are not changed in the subprogram
> + * (code, presumably, generated by an older clang version).
> + *
> + * For example, it is *not* safe to remove spill/fill below:
> + *
> + *   r1 = 1;
> + *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
> + *   call %[to_be_inlined_by_jit]  -->   call %[to_be_inlined_by_jit]

Same as above.

Thanks for working on this feature.

Most of my comments except for bpf_jit_inlines_helper_call() are nit
picks regarding the comments or the commit message, but as the verifier
is already complicated, I feel it is useful to have extremely accurate
comments/commit messages for new contributors.

Thanks,
Puranjay
Eduard Zingerman July 3, 2024, 4:13 p.m. UTC | #8
On Wed, 2024-07-03 at 11:57 +0000, Puranjay Mohan wrote:

[...]

> > +static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > +{
> > +	const struct bpf_func_proto *fn;
> > +
> > +	if (bpf_helper_call(insn) &&
> > +	    verifier_inlines_helper_call(env, insn->imm) &&
> 
> This should also check bpf_jit_inlines_helper_call(insn->imm) as the JIT
> can also inline helper calls separately from the verifier.
> 
>     if (bpf_helper_call(insn) &&
>         (verifier_inlines_helper_call(env, insn->imm) || bpf_jit_inlines_helper_call(insn->imm)) &&
> 
> This is currently being done by the arm64 and risc-v JITs and they don't
> scratch any register except R0 (The helpers inlined by these JITs are
> non-void).

Hello Puranjay, thank you for commenting.
In a sibling email Andrii suggested to also add a function like below:

    __weak bool bpf_jit_supports_helper_nocsr(s32)

At the moment I see the following helpers inlined by jits:
- arm64:
  - BPF_FUNC_get_smp_processor_id
  - BPF_FUNC_get_current_task
  - BPF_FUNC_get_current_task_btf
- riscv:
  - BPF_FUNC_get_smp_processor_id

I suspect (but need to double check) that all of these patches conform
to nocsr. If so, what are you thoughts regarding bpf_jit_supports_helper_nocsr():
do we need it, or should inlining imply nocsr?

Thanks,
Eduard
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 960780ef04e1..f4faa6b8cb5b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -807,6 +807,12 @@  struct bpf_func_proto {
 	bool gpl_only;
 	bool pkt_access;
 	bool might_sleep;
+	/* set to true if helper follows contract for gcc/llvm
+	 * attribute no_caller_saved_registers:
+	 * - void functions do not scratch r0
+	 * - functions taking N arguments scratch only registers r1-rN
+	 */
+	bool nocsr;
 	enum bpf_return_type ret_type;
 	union {
 		struct {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 2b54e25d2364..67dbe4cb1529 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -585,6 +585,10 @@  struct bpf_insn_aux_data {
 	 * accepts callback function as a parameter.
 	 */
 	bool calls_callback;
+	/* true if STX or LDX instruction is a part of a spill/fill
+	 * pattern for a no_caller_saved_registers call.
+	 */
+	bool nocsr_pattern;
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -641,6 +645,11 @@  struct bpf_subprog_info {
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
 	u16 stack_extra;
+	/* stack depth after which slots reserved for
+	 * no_caller_saved_registers spills/fills start,
+	 * value <= nocsr_stack_off belongs to the spill/fill area.
+	 */
+	s16 nocsr_stack_off;
 	bool has_tail_call: 1;
 	bool tail_call_reachable: 1;
 	bool has_ld_abs: 1;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8dd3385cf925..1340d3e60d30 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2471,16 +2471,41 @@  static int cmp_subprogs(const void *a, const void *b)
 	       ((struct bpf_subprog_info *)b)->start;
 }
 
-static int find_subprog(struct bpf_verifier_env *env, int off)
+/* Find subprogram that contains instruction at 'off' */
+static int find_containing_subprog(struct bpf_verifier_env *env, int off)
 {
-	struct bpf_subprog_info *p;
+	struct bpf_subprog_info *vals = env->subprog_info;
+	int high = env->subprog_cnt - 1;
+	int low = 0, ret = -ENOENT;
 
-	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
-		    sizeof(env->subprog_info[0]), cmp_subprogs);
-	if (!p)
+	if (off >= env->prog->len || off < 0)
 		return -ENOENT;
-	return p - env->subprog_info;
 
+	while (low <= high) {
+		int mid = (low + high)/2;
+		struct bpf_subprog_info *val = &vals[mid];
+		int diff = off - val->start;
+
+		if (diff < 0) {
+			high = mid - 1;
+		} else {
+			low = mid + 1;
+			/* remember last time mid.start <= off */
+			ret = mid;
+		}
+	}
+	return ret;
+}
+
+/* Find subprogram that starts exactly at 'off' */
+static int find_subprog(struct bpf_verifier_env *env, int off)
+{
+	int idx;
+
+	idx = find_containing_subprog(env, off);
+	if (idx < 0 || env->subprog_info[idx].start != off)
+		return -ENOENT;
+	return idx;
 }
 
 static int add_subprog(struct bpf_verifier_env *env, int off)
@@ -4501,6 +4526,23 @@  static int get_reg_width(struct bpf_reg_state *reg)
 	return fls64(reg->umax_value);
 }
 
+/* See comment for match_and_mark_nocsr_pattern() */
+static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state,
+				       int insn_idx, int off)
+{
+	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
+	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+
+	if (subprog->nocsr_stack_off <= off || aux->nocsr_pattern)
+		return;
+	/* access to the region [max_stack_depth .. nocsr_stack_off]
+	 * from something that is not a part of the nocsr pattern,
+	 * disable nocsr rewrites for current subprogram by setting
+	 * nocsr_stack_off to a value smaller than any possible offset.
+	 */
+	subprog->nocsr_stack_off = S16_MIN;
+}
+
 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
@@ -4549,6 +4591,7 @@  static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
+	check_nocsr_stack_contract(env, state, insn_idx, off);
 	mark_stack_slot_scratched(env, spi);
 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
 		bool reg_value_fits;
@@ -4682,6 +4725,7 @@  static int check_stack_write_var_off(struct bpf_verifier_env *env,
 			return err;
 	}
 
+	check_nocsr_stack_contract(env, state, insn_idx, min_off);
 	/* Variable offset writes destroy any spilled pointers in range. */
 	for (i = min_off; i < max_off; i++) {
 		u8 new_type, *stype;
@@ -4820,6 +4864,7 @@  static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 	reg = &reg_state->stack[spi].spilled_ptr;
 
 	mark_stack_slot_scratched(env, spi);
+	check_nocsr_stack_contract(env, state, env->insn_idx, off);
 
 	if (is_spilled_reg(&reg_state->stack[spi])) {
 		u8 spill_size = 1;
@@ -4980,6 +5025,7 @@  static int check_stack_read_var_off(struct bpf_verifier_env *env,
 	min_off = reg->smin_value + off;
 	max_off = reg->smax_value + off;
 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
+	check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off);
 	return 0;
 }
 
@@ -15950,6 +15996,205 @@  static int visit_func_call_insn(int t, struct bpf_insn *insns,
 	return ret;
 }
 
+/* Bitmask with 1s for all caller saved registers */
+#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+
+/* Return a bitmask specifying which caller saved registers are
+ * modified by a call to a helper.
+ * (Either as a return value or as scratch registers).
+ *
+ * For normal helpers registers R0-R5 are scratched.
+ * For helpers marked as no_csr:
+ * - scratch R0 if function is non-void;
+ * - scratch R1-R5 if corresponding parameter type is set
+ *   in the function prototype.
+ */
+static u8 get_helper_reg_mask(const struct bpf_func_proto *fn)
+{
+	u8 mask;
+	int i;
+
+	if (!fn->nocsr)
+		return ALL_CALLER_SAVED_REGS;
+
+	mask = 0;
+	mask |= fn->ret_type == RET_VOID ? 0 : BIT(BPF_REG_0);
+	for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
+		mask |= fn->arg_type[i] == ARG_DONTCARE ? 0 : BIT(BPF_REG_1 + i);
+	return mask;
+}
+
+/* True if do_misc_fixups() replaces calls to helper number 'imm',
+ * replacement patch is presumed to follow no_caller_saved_registers contract
+ * (see match_and_mark_nocsr_pattern() below).
+ */
+static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
+{
+	return false;
+}
+
+/* If 'insn' is a call that follows no_caller_saved_registers contract
+ * and called function is inlined by current jit, return a mask with
+ * 1s corresponding to registers that are scratched by this call
+ * (depends on return type and number of return parameters).
+ * Otherwise return ALL_CALLER_SAVED_REGS mask.
+ */
+static u32 call_csr_mask(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+	const struct bpf_func_proto *fn;
+
+	if (bpf_helper_call(insn) &&
+	    verifier_inlines_helper_call(env, insn->imm) &&
+	    get_helper_proto(env, insn->imm, &fn) == 0 &&
+	    fn->nocsr)
+		return ~get_helper_reg_mask(fn);
+
+	return ALL_CALLER_SAVED_REGS;
+}
+
+/* GCC and LLVM define a no_caller_saved_registers function attribute.
+ * This attribute means that function scratches only some of
+ * the caller saved registers defined by ABI.
+ * For BPF the set of such registers could be defined as follows:
+ * - R0 is scratched only if function is non-void;
+ * - R1-R5 are scratched only if corresponding parameter type is defined
+ *   in the function prototype.
+ *
+ * The contract between kernel and clang allows to simultaneously use
+ * such functions and maintain backwards compatibility with old
+ * kernels that don't understand no_caller_saved_registers calls
+ * (nocsr for short):
+ *
+ * - for nocsr calls clang allocates registers as-if relevant r0-r5
+ *   registers are not scratched by the call;
+ *
+ * - as a post-processing step, clang visits each nocsr call and adds
+ *   spill/fill for every live r0-r5;
+ *
+ * - stack offsets used for the spill/fill are allocated as minimal
+ *   stack offsets in whole function and are not used for any other
+ *   purposes;
+ *
+ * - when kernel loads a program, it looks for such patterns
+ *   (nocsr function surrounded by spills/fills) and checks if
+ *   spill/fill stack offsets are used exclusively in nocsr patterns;
+ *
+ * - if so, and if current JIT inlines the call to the nocsr function
+ *   (e.g. a helper call), kernel removes unnecessary spill/fill pairs;
+ *
+ * - when old kernel loads a program, presence of spill/fill pairs
+ *   keeps BPF program valid, albeit slightly less efficient.
+ *
+ * For example:
+ *
+ *   r1 = 1;
+ *   r2 = 2;
+ *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
+ *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
+ *   call %[to_be_inlined_by_jit]  -->   call %[to_be_inlined_by_jit]
+ *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
+ *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
+ *   r0 = r1;                            exit;
+ *   r0 += r2;
+ *   exit;
+ *
+ * The purpose of match_and_mark_nocsr_pattern is to:
+ * - look for such patterns;
+ * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern;
+ * - update env->subprog_info[*]->nocsr_stack_off to find an offset
+ *   at which nocsr spill/fill stack slots start.
+ *
+ * The .nocsr_pattern and .nocsr_stack_off are used by
+ * check_nocsr_stack_contract() to check if every stack access to
+ * nocsr spill/fill stack slot originates from spill/fill
+ * instructions, members of nocsr patterns.
+ *
+ * If such condition holds true for a subprogram, nocsr patterns could
+ * be rewritten by remove_nocsr_spills_fills().
+ * Otherwise nocsr patterns are not changed in the subprogram
+ * (code, presumably, generated by an older clang version).
+ *
+ * For example, it is *not* safe to remove spill/fill below:
+ *
+ *   r1 = 1;
+ *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
+ *   call %[to_be_inlined_by_jit]  -->   call %[to_be_inlined_by_jit]
+ *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
+ *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
+ *   r0 += r1;                           exit;
+ *   exit;
+ */
+static int match_and_mark_nocsr_pattern(struct bpf_verifier_env *env, int t, bool mark)
+{
+	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
+	struct bpf_subprog_info *subprog;
+	u32 csr_mask = call_csr_mask(env, &insns[t]);
+	u32 reg_mask = ~csr_mask | ~ALL_CALLER_SAVED_REGS;
+	int s, i;
+	s16 off;
+
+	if (csr_mask == ALL_CALLER_SAVED_REGS)
+		return false;
+
+	for (i = 1, off = 0; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
+		if (t - i < 0 || t + i >= env->prog->len)
+			break;
+		stx = &insns[t - i];
+		ldx = &insns[t + i];
+		if (off == 0) {
+			off = stx->off;
+			if (off % BPF_REG_SIZE != 0)
+				break;
+		}
+		if (/* *(u64 *)(r10 - off) = r[0-5]? */
+		    stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
+		    stx->dst_reg != BPF_REG_10 ||
+		    /* r[0-5] = *(u64 *)(r10 - off)? */
+		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
+		    ldx->src_reg != BPF_REG_10 ||
+		    /* check spill/fill for the same reg and offset */
+		    stx->src_reg != ldx->dst_reg ||
+		    stx->off != ldx->off ||
+		    stx->off != off ||
+		    /* this should be a previously unseen register */
+		    BIT(stx->src_reg) & reg_mask)
+			break;
+		reg_mask |= BIT(stx->src_reg);
+		if (mark) {
+			env->insn_aux_data[t - i].nocsr_pattern = true;
+			env->insn_aux_data[t + i].nocsr_pattern = true;
+		}
+	}
+	if (i == 1)
+		return 0;
+	if (mark) {
+		s = find_containing_subprog(env, t);
+		/* can't happen */
+		if (WARN_ON_ONCE(s < 0))
+			return 0;
+		subprog = &env->subprog_info[s];
+		subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
+	}
+	return i - 1;
+}
+
+/* If instruction 't' is a nocsr call surrounded by spill/fill pairs,
+ * update env->subprog_info[_]->nocsr_stack_off and
+ * env->insn_aux_data[_].nocsr_pattern fields.
+ */
+static void update_nocsr_pattern_marks(struct bpf_verifier_env *env, int t)
+{
+	match_and_mark_nocsr_pattern(env, t, true);
+}
+
+/* If instruction 't' is a nocsr call surrounded by spill/fill pairs,
+ * return the number of such pairs.
+ */
+static int match_nocsr_pattern(struct bpf_verifier_env *env, int t)
+{
+	return match_and_mark_nocsr_pattern(env, t, false);
+}
+
 /* Visits the instruction at index t and returns one of the following:
  *  < 0 - an error occurred
  *  DONE_EXPLORING - the instruction was fully explored
@@ -16017,6 +16262,8 @@  static int visit_insn(int t, struct bpf_verifier_env *env)
 				mark_force_checkpoint(env, t);
 			}
 		}
+		if (insn->src_reg == 0)
+			update_nocsr_pattern_marks(env, t);
 		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
 
 	case BPF_JA:
@@ -19063,15 +19310,16 @@  static int opt_remove_dead_code(struct bpf_verifier_env *env)
 	return 0;
 }
 
+static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+
 static int opt_remove_nops(struct bpf_verifier_env *env)
 {
-	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
 	int i, err;
 
 	for (i = 0; i < insn_cnt; i++) {
-		if (memcmp(&insn[i], &ja, sizeof(ja)))
+		if (memcmp(&insn[i], &NOP, sizeof(NOP)))
 			continue;
 
 		err = verifier_remove_insns(env, i, 1);
@@ -20801,6 +21049,39 @@  static int optimize_bpf_loop(struct bpf_verifier_env *env)
 	return 0;
 }
 
+/* Remove unnecessary spill/fill pairs, members of nocsr pattern.
+ * Do this as a separate pass to avoid interfering with helper/kfunc
+ * inlining logic in do_misc_fixups().
+ * See comment for match_and_mark_nocsr_pattern().
+ */
+static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	int i, j, spills_num, cur_subprog = 0;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		spills_num = match_nocsr_pattern(env, i);
+		if (spills_num == 0)
+			goto next_insn;
+		for (j = 1; j <= spills_num; ++j)
+			if ((insn - j)->off >= subprogs[cur_subprog].nocsr_stack_off ||
+			    (insn + j)->off >= subprogs[cur_subprog].nocsr_stack_off)
+				goto next_insn;
+		/* NOPs are removed by opt_remove_nops() later */
+		for (j = 1; j <= spills_num; ++j) {
+			*(insn - j) = NOP;
+			*(insn + j) = NOP;
+		}
+
+next_insn:
+		if (subprogs[cur_subprog + 1].start == i + 1)
+			cur_subprog++;
+	}
+	return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state_list *sl, *sln;
@@ -21719,6 +22000,9 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret == 0)
 		ret = optimize_bpf_loop(env);
 
+	if (ret == 0)
+		ret = remove_nocsr_spills_fills(env);
+
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);