diff mbox series

[bpf-next,v2,1/2] bpf: Fix a sdiv overflow issue

Message ID 20240912035945.667426-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,v2,1/2] bpf: Fix a sdiv overflow issue | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: sdf@fomichev.me eddyz87@gmail.com haoluo@google.com jolsa@kernel.org song@kernel.org kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 26 this patch: 26
netdev/checkpatch warning CHECK: multiple assignments should be avoided WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yonghong Song Sept. 12, 2024, 3:59 a.m. UTC
Zac Ecob reported a problem where a bpf program may cause kernel crash due
to the following error:
  Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI

The failure is due to the below signed divide:
  LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808.
LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808,
but it is impossible since for 64-bit system, the maximum positive
number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will
cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is
LLONG_MIN.

Further investigation found all the following sdiv/smod cases may trigger
an exception when bpf program is running on x86_64 platform:
  - LLONG_MIN/-1 for 64bit operation
  - INT_MIN/-1 for 32bit operation
  - LLONG_MIN%-1 for 64bit operation
  - INT_MIN%-1 for 32bit operation
where -1 can be an immediate or in a register.

On arm64, there are no exceptions:
  - LLONG_MIN/-1 = LLONG_MIN
  - INT_MIN/-1 = INT_MIN
  - LLONG_MIN%-1 = 0
  - INT_MIN%-1 = 0
where -1 can be an immediate or in a register.

Insn patching is needed to handle the above cases and the patched codes
produced results aligned with above arm64 result.

  [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/

Reported-by: Zac Ecob <zacecob@protonmail.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 80 insertions(+), 4 deletions(-)

Changelogs:
  v1 -> v2:
    - Handle more crash cases like 32bit operation and modules.
    - Add more tests to test new cases.

Comments

Andrii Nakryiko Sept. 12, 2024, 6:17 p.m. UTC | #1
On Wed, Sep 11, 2024 at 9:00 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> Zac Ecob reported a problem where a bpf program may cause kernel crash due
> to the following error:
>   Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
>
> The failure is due to the below signed divide:
>   LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808.
> LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808,
> but it is impossible since for 64-bit system, the maximum positive
> number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will
> cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is
> LLONG_MIN.
>
> Further investigation found all the following sdiv/smod cases may trigger
> an exception when bpf program is running on x86_64 platform:
>   - LLONG_MIN/-1 for 64bit operation
>   - INT_MIN/-1 for 32bit operation
>   - LLONG_MIN%-1 for 64bit operation
>   - INT_MIN%-1 for 32bit operation
> where -1 can be an immediate or in a register.
>
> On arm64, there are no exceptions:
>   - LLONG_MIN/-1 = LLONG_MIN
>   - INT_MIN/-1 = INT_MIN
>   - LLONG_MIN%-1 = 0
>   - INT_MIN%-1 = 0
> where -1 can be an immediate or in a register.
>
> Insn patching is needed to handle the above cases and the patched codes
> produced results aligned with above arm64 result.
>
>   [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/
>
> Reported-by: Zac Ecob <zacecob@protonmail.com>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 80 insertions(+), 4 deletions(-)
>
> Changelogs:
>   v1 -> v2:
>     - Handle more crash cases like 32bit operation and modules.
>     - Add more tests to test new cases.
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f35b80c16cda..ad7f51302c70 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20499,13 +20499,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                         /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
>                         insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
>
> -               /* Make divide-by-zero exceptions impossible. */
> +               /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
> +               if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
> +                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
> +                    insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
> +                    insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
> +                   insn->off == 1 && insn->imm == -1) {
> +                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> +                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> +                       struct bpf_insn *patchlet;
> +                       struct bpf_insn chk_and_div[] = {
> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> +                                            0, 0, 0),
> +                       };
> +                       struct bpf_insn chk_and_mod[] = {
> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> +                       };
> +
> +                       patchlet = isdiv ? chk_and_div : chk_and_mod;

nit: "chk_and_" part in the name is misleading, it's more like
"safe_div" and "safe_mod". Oh, and it's "sdiv" and "smod" specific, so
probably not a bad idea to have that in the name as well.

> +                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod);
> +
> +                       new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
> +                       if (!new_prog)
> +                               return -ENOMEM;
> +
> +                       delta    += cnt - 1;
> +                       env->prog = prog = new_prog;
> +                       insn      = new_prog->insnsi + i + delta;
> +                       goto next_insn;
> +               }
> +
> +               /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
>                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
>                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
>                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
>                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
>                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
>                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> +                       bool is_sdiv = isdiv && insn->off == 1;
> +                       bool is_smod = !isdiv && insn->off == 1;
>                         struct bpf_insn *patchlet;
>                         struct bpf_insn chk_and_div[] = {
>                                 /* [R,W]x div 0 -> 0 */
> @@ -20525,10 +20558,53 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
>                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
>                         };
> +                       struct bpf_insn chk_and_sdiv[] = {
> +                               /* [R,W]x sdiv 0 -> 0 */
> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> +                                            BPF_JNE | BPF_K, insn->src_reg,
> +                                            0, 2, 0),
> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> +                               /* LLONG_MIN sdiv -1 -> LLONG_MIN
> +                                * INT_MIN sdiv -1 -> INT_MIN
> +                                */
> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> +                                            BPF_JNE | BPF_K, insn->src_reg,
> +                                            0, 2, -1),
> +                               /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> +                                            0, 0, 0),
> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),

I don't know how much it actually matters, but it feels like common
safe case should be as straight-line-executed as possible, no?

So maybe it's better to rearrange to roughly this (where rX is the
divisor register):

    if rX == 0 goto L1
    if rX == -1 goto L2
    rY /= rX
    goto L3
L1: /* zero case */
    rY = 0 /* fallthrough, negation doesn't hurt, but less jumping */
L2: /* negative one case (or zero) */
    rY = -rY
L3:
    ... the rest of the program code ...


Those two branches for common case are still annoyingly inefficient, I
wonder if we should do

    rX += 1 /* [-1, 0] -> [0, 1]
    if rX <=(unsigned) 1 goto L1
    rX -= 1 /* restore original divisor */
    rY /= rX /* common case */
    goto L3
L1:
    if rX == 0 goto L2 /* jump if originally -1 */
    rY = 0 /* division by zero case */
L2: /* fallthrough */
    rY = -rY
    rX -= 1 /* restore original divisor */
L3:
    ... continue with the rest ...


It's a bit trickier to follow, but should be faster in a common case.

WDYT? Too much too far?


> +                               *insn,
> +                       };
> +                       struct bpf_insn chk_and_smod[] = {
> +                               /* [R,W]x mod 0 -> [R,W]x */
> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> +                                            BPF_JNE | BPF_K, insn->src_reg,
> +                                            0, 2, 0),
> +                               BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> +                               /* [R,W]x mod -1 -> 0 */
> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> +                                            BPF_JNE | BPF_K, insn->src_reg,
> +                                            0, 2, -1),
> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> +                               *insn,
> +                       };
>

Same idea here, keep the common case as straight as possible.

> -                       patchlet = isdiv ? chk_and_div : chk_and_mod;
> -                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> -                                     ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> +                       if (is_sdiv) {
> +                               patchlet = chk_and_sdiv;
> +                               cnt = ARRAY_SIZE(chk_and_sdiv);
> +                       } else if (is_smod) {
> +                               patchlet = chk_and_smod;
> +                               cnt = ARRAY_SIZE(chk_and_smod);
> +                       } else {
> +                               patchlet = isdiv ? chk_and_div : chk_and_mod;
> +                               cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> +                                             ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> +                       }
>
>                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
>                         if (!new_prog)
> --
> 2.43.5
>
Andrii Nakryiko Sept. 12, 2024, 6:19 p.m. UTC | #2
On Thu, Sep 12, 2024 at 11:17 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Sep 11, 2024 at 9:00 PM Yonghong Song <yonghong.song@linux.dev> wrote:
> >
> > Zac Ecob reported a problem where a bpf program may cause kernel crash due
> > to the following error:
> >   Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
> >
> > The failure is due to the below signed divide:
> >   LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808.
> > LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808,
> > but it is impossible since for 64-bit system, the maximum positive
> > number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will
> > cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is
> > LLONG_MIN.
> >
> > Further investigation found all the following sdiv/smod cases may trigger
> > an exception when bpf program is running on x86_64 platform:
> >   - LLONG_MIN/-1 for 64bit operation
> >   - INT_MIN/-1 for 32bit operation
> >   - LLONG_MIN%-1 for 64bit operation
> >   - INT_MIN%-1 for 32bit operation
> > where -1 can be an immediate or in a register.
> >
> > On arm64, there are no exceptions:
> >   - LLONG_MIN/-1 = LLONG_MIN
> >   - INT_MIN/-1 = INT_MIN
> >   - LLONG_MIN%-1 = 0
> >   - INT_MIN%-1 = 0
> > where -1 can be an immediate or in a register.
> >
> > Insn patching is needed to handle the above cases and the patched codes
> > produced results aligned with above arm64 result.
> >
> >   [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/
> >
> > Reported-by: Zac Ecob <zacecob@protonmail.com>
> > Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> > ---
> >  kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 80 insertions(+), 4 deletions(-)
> >
> > Changelogs:
> >   v1 -> v2:
> >     - Handle more crash cases like 32bit operation and modules.
> >     - Add more tests to test new cases.
> >
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index f35b80c16cda..ad7f51302c70 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -20499,13 +20499,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> >                         /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
> >                         insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
> >
> > -               /* Make divide-by-zero exceptions impossible. */
> > +               /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
> > +               if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
> > +                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
> > +                    insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
> > +                    insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
> > +                   insn->off == 1 && insn->imm == -1) {
> > +                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> > +                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> > +                       struct bpf_insn *patchlet;
> > +                       struct bpf_insn chk_and_div[] = {
> > +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> > +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> > +                                            0, 0, 0),
> > +                       };
> > +                       struct bpf_insn chk_and_mod[] = {
> > +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> > +                       };
> > +
> > +                       patchlet = isdiv ? chk_and_div : chk_and_mod;
>
> nit: "chk_and_" part in the name is misleading, it's more like
> "safe_div" and "safe_mod". Oh, and it's "sdiv" and "smod" specific, so
> probably not a bad idea to have that in the name as well.
>
> > +                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod);
> > +
> > +                       new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
> > +                       if (!new_prog)
> > +                               return -ENOMEM;
> > +
> > +                       delta    += cnt - 1;
> > +                       env->prog = prog = new_prog;
> > +                       insn      = new_prog->insnsi + i + delta;
> > +                       goto next_insn;
> > +               }
> > +
> > +               /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
> >                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
> >                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
> >                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
> >                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
> >                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> >                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> > +                       bool is_sdiv = isdiv && insn->off == 1;
> > +                       bool is_smod = !isdiv && insn->off == 1;
> >                         struct bpf_insn *patchlet;
> >                         struct bpf_insn chk_and_div[] = {
> >                                 /* [R,W]x div 0 -> 0 */
> > @@ -20525,10 +20558,53 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> >                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> >                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
> >                         };
> > +                       struct bpf_insn chk_and_sdiv[] = {
> > +                               /* [R,W]x sdiv 0 -> 0 */
> > +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> > +                                            BPF_JNE | BPF_K, insn->src_reg,
> > +                                            0, 2, 0),
> > +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> > +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> > +                               /* LLONG_MIN sdiv -1 -> LLONG_MIN
> > +                                * INT_MIN sdiv -1 -> INT_MIN
> > +                                */
> > +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> > +                                            BPF_JNE | BPF_K, insn->src_reg,
> > +                                            0, 2, -1),
> > +                               /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
> > +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> > +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> > +                                            0, 0, 0),
> > +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
>
> I don't know how much it actually matters, but it feels like common
> safe case should be as straight-line-executed as possible, no?
>
> So maybe it's better to rearrange to roughly this (where rX is the
> divisor register):
>
>     if rX == 0 goto L1
>     if rX == -1 goto L2
>     rY /= rX
>     goto L3
> L1: /* zero case */
>     rY = 0 /* fallthrough, negation doesn't hurt, but less jumping */
> L2: /* negative one case (or zero) */
>     rY = -rY
> L3:
>     ... the rest of the program code ...
>
>
> Those two branches for common case are still annoyingly inefficient, I
> wonder if we should do
>
>     rX += 1 /* [-1, 0] -> [0, 1]
>     if rX <=(unsigned) 1 goto L1
>     rX -= 1 /* restore original divisor */
>     rY /= rX /* common case */
>     goto L3
> L1:
>     if rX == 0 goto L2 /* jump if originally -1 */
>     rY = 0 /* division by zero case */
> L2: /* fallthrough */
>     rY = -rY
>     rX -= 1 /* restore original divisor */
> L3:
>     ... continue with the rest ...

hmm.. just in case rX is the same register as rY, probably best to
restore rX early right at L1: label (and adjust `if rX == 0 goto L2`
into `if rX != 0 goto L2`).

>
>
> It's a bit trickier to follow, but should be faster in a common case.
>
> WDYT? Too much too far?
>
>
> > +                               *insn,
> > +                       };
> > +                       struct bpf_insn chk_and_smod[] = {
> > +                               /* [R,W]x mod 0 -> [R,W]x */
> > +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> > +                                            BPF_JNE | BPF_K, insn->src_reg,
> > +                                            0, 2, 0),
> > +                               BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
> > +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> > +                               /* [R,W]x mod -1 -> 0 */
> > +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> > +                                            BPF_JNE | BPF_K, insn->src_reg,
> > +                                            0, 2, -1),
> > +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> > +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> > +                               *insn,
> > +                       };
> >
>
> Same idea here, keep the common case as straight as possible.
>
> > -                       patchlet = isdiv ? chk_and_div : chk_and_mod;
> > -                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> > -                                     ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> > +                       if (is_sdiv) {
> > +                               patchlet = chk_and_sdiv;
> > +                               cnt = ARRAY_SIZE(chk_and_sdiv);
> > +                       } else if (is_smod) {
> > +                               patchlet = chk_and_smod;
> > +                               cnt = ARRAY_SIZE(chk_and_smod);
> > +                       } else {
> > +                               patchlet = isdiv ? chk_and_div : chk_and_mod;
> > +                               cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> > +                                             ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> > +                       }
> >
> >                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
> >                         if (!new_prog)
> > --
> > 2.43.5
> >
Yonghong Song Sept. 12, 2024, 10:53 p.m. UTC | #3
On 9/12/24 11:17 AM, Andrii Nakryiko wrote:
> On Wed, Sep 11, 2024 at 9:00 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> Zac Ecob reported a problem where a bpf program may cause kernel crash due
>> to the following error:
>>    Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
>>
>> The failure is due to the below signed divide:
>>    LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808.
>> LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808,
>> but it is impossible since for 64-bit system, the maximum positive
>> number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will
>> cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is
>> LLONG_MIN.
>>
>> Further investigation found all the following sdiv/smod cases may trigger
>> an exception when bpf program is running on x86_64 platform:
>>    - LLONG_MIN/-1 for 64bit operation
>>    - INT_MIN/-1 for 32bit operation
>>    - LLONG_MIN%-1 for 64bit operation
>>    - INT_MIN%-1 for 32bit operation
>> where -1 can be an immediate or in a register.
>>
>> On arm64, there are no exceptions:
>>    - LLONG_MIN/-1 = LLONG_MIN
>>    - INT_MIN/-1 = INT_MIN
>>    - LLONG_MIN%-1 = 0
>>    - INT_MIN%-1 = 0
>> where -1 can be an immediate or in a register.
>>
>> Insn patching is needed to handle the above cases and the patched codes
>> produced results aligned with above arm64 result.
>>
>>    [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/
>>
>> Reported-by: Zac Ecob <zacecob@protonmail.com>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 80 insertions(+), 4 deletions(-)
>>
>> Changelogs:
>>    v1 -> v2:
>>      - Handle more crash cases like 32bit operation and modules.
>>      - Add more tests to test new cases.
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index f35b80c16cda..ad7f51302c70 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -20499,13 +20499,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>>                          /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
>>                          insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
>>
>> -               /* Make divide-by-zero exceptions impossible. */
>> +               /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
>> +               if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
>> +                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
>> +                    insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
>> +                    insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
>> +                   insn->off == 1 && insn->imm == -1) {
>> +                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
>> +                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
>> +                       struct bpf_insn *patchlet;
>> +                       struct bpf_insn chk_and_div[] = {
>> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
>> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
>> +                                            0, 0, 0),
>> +                       };
>> +                       struct bpf_insn chk_and_mod[] = {
>> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
>> +                       };
>> +
>> +                       patchlet = isdiv ? chk_and_div : chk_and_mod;
> nit: "chk_and_" part in the name is misleading, it's more like
> "safe_div" and "safe_mod". Oh, and it's "sdiv" and "smod" specific, so
> probably not a bad idea to have that in the name as well.

good idea. Will use chk_and_sdiv and chk_and_smod.

>
>> +                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod);
>> +
>> +                       new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
>> +                       if (!new_prog)
>> +                               return -ENOMEM;
>> +
>> +                       delta    += cnt - 1;
>> +                       env->prog = prog = new_prog;
>> +                       insn      = new_prog->insnsi + i + delta;
>> +                       goto next_insn;
>> +               }
>> +
>> +               /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
>>                  if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
>>                      insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
>>                      insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
>>                      insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
>>                          bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
>>                          bool isdiv = BPF_OP(insn->code) == BPF_DIV;
>> +                       bool is_sdiv = isdiv && insn->off == 1;
>> +                       bool is_smod = !isdiv && insn->off == 1;
>>                          struct bpf_insn *patchlet;
>>                          struct bpf_insn chk_and_div[] = {
>>                                  /* [R,W]x div 0 -> 0 */
>> @@ -20525,10 +20558,53 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>>                                  BPF_JMP_IMM(BPF_JA, 0, 0, 1),
>>                                  BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
>>                          };
>> +                       struct bpf_insn chk_and_sdiv[] = {
>> +                               /* [R,W]x sdiv 0 -> 0 */
>> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
>> +                                            BPF_JNE | BPF_K, insn->src_reg,
>> +                                            0, 2, 0),
>> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
>> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
>> +                               /* LLONG_MIN sdiv -1 -> LLONG_MIN
>> +                                * INT_MIN sdiv -1 -> INT_MIN
>> +                                */
>> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
>> +                                            BPF_JNE | BPF_K, insn->src_reg,
>> +                                            0, 2, -1),
>> +                               /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
>> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
>> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
>> +                                            0, 0, 0),
>> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> I don't know how much it actually matters, but it feels like common
> safe case should be as straight-line-executed as possible, no?
>
> So maybe it's better to rearrange to roughly this (where rX is the
> divisor register):
>
>      if rX == 0 goto L1
>      if rX == -1 goto L2
>      rY /= rX
>      goto L3
> L1: /* zero case */
>      rY = 0 /* fallthrough, negation doesn't hurt, but less jumping */
> L2: /* negative one case (or zero) */
>      rY = -rY
> L3:
>      ... the rest of the program code ...

My previous patched insn try to clearly separate rX == 0 and
rX == -1 case. It has 2 insns including 2 cond jmps, 2 uncond jmps and
one 3 alu operations. The above one removed one uncond jmp, which
is indeed better.

>
>
> Those two branches for common case are still annoyingly inefficient, I
> wonder if we should do
>
>      rX += 1 /* [-1, 0] -> [0, 1]
>      if rX <=(unsigned) 1 goto L1
>      rX -= 1 /* restore original divisor */
>      rY /= rX /* common case */
>      goto L3
> L1:
>      if rX == 0 goto L2 /* jump if originally -1 */
>      rY = 0 /* division by zero case */
> L2: /* fallthrough */
>      rY = -rY
>      rX -= 1 /* restore original divisor */
> L3:
>      ... continue with the rest ...
>
>
> It's a bit trickier to follow, but should be faster in a common case.
>
> WDYT? Too much too far?

This is even better. The above rX -= 1 can be removed if we use
BPF_REG_AX as the temporary register. For example,

     tmp = rX
     tmp += 1 /* [-1, 0] -> [0, 1]
     if tmp <=(unsigned) 1 goto L1
     rY /= rX /* common case */
     goto L3
L1:
     if tmp == 0 goto L2 /* jump if originally -1 */
     rY = 0 /* division by zero case */
L2: /* fallthrough */
     rY = -rY
L3:
     ... continue with the rest ...

Maybe we can do even better

     tmp = rX
     tmp += 1 /* [-1, 0] -> [0, 1]
     if tmp >(unsigned) 1 goto L2
     if tmp == 0 goto L1
     rY = 0
L1:
     rY = -rY;
     goto L3
L2:
     rY /= rX
L3:

Could this be even better by reducing one uncond jmp in the fast path?

>
>
>> +                               *insn,
>> +                       };
>> +                       struct bpf_insn chk_and_smod[] = {
>> +                               /* [R,W]x mod 0 -> [R,W]x */
>> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
>> +                                            BPF_JNE | BPF_K, insn->src_reg,
>> +                                            0, 2, 0),
>> +                               BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
>> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
>> +                               /* [R,W]x mod -1 -> 0 */
>> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
>> +                                            BPF_JNE | BPF_K, insn->src_reg,
>> +                                            0, 2, -1),
>> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
>> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
>> +                               *insn,
>> +                       };
>>
> Same idea here, keep the common case as straight as possible.

Sure. Will do.

>
>> -                       patchlet = isdiv ? chk_and_div : chk_and_mod;
>> -                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
>> -                                     ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
>> +                       if (is_sdiv) {
>> +                               patchlet = chk_and_sdiv;
>> +                               cnt = ARRAY_SIZE(chk_and_sdiv);
>> +                       } else if (is_smod) {
>> +                               patchlet = chk_and_smod;
>> +                               cnt = ARRAY_SIZE(chk_and_smod);
>> +                       } else {
>> +                               patchlet = isdiv ? chk_and_div : chk_and_mod;
>> +                               cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
>> +                                             ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
>> +                       }
>>
>>                          new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
>>                          if (!new_prog)
>> --
>> 2.43.5
>>
Andrii Nakryiko Sept. 13, 2024, 2 a.m. UTC | #4
On Thu, Sep 12, 2024 at 3:53 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> On 9/12/24 11:17 AM, Andrii Nakryiko wrote:
> > On Wed, Sep 11, 2024 at 9:00 PM Yonghong Song <yonghong.song@linux.dev> wrote:
> >> Zac Ecob reported a problem where a bpf program may cause kernel crash due
> >> to the following error:
> >>    Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
> >>
> >> The failure is due to the below signed divide:
> >>    LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808.
> >> LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808,
> >> but it is impossible since for 64-bit system, the maximum positive
> >> number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will
> >> cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is
> >> LLONG_MIN.
> >>
> >> Further investigation found all the following sdiv/smod cases may trigger
> >> an exception when bpf program is running on x86_64 platform:
> >>    - LLONG_MIN/-1 for 64bit operation
> >>    - INT_MIN/-1 for 32bit operation
> >>    - LLONG_MIN%-1 for 64bit operation
> >>    - INT_MIN%-1 for 32bit operation
> >> where -1 can be an immediate or in a register.
> >>
> >> On arm64, there are no exceptions:
> >>    - LLONG_MIN/-1 = LLONG_MIN
> >>    - INT_MIN/-1 = INT_MIN
> >>    - LLONG_MIN%-1 = 0
> >>    - INT_MIN%-1 = 0
> >> where -1 can be an immediate or in a register.
> >>
> >> Insn patching is needed to handle the above cases and the patched codes
> >> produced results aligned with above arm64 result.
> >>
> >>    [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/
> >>
> >> Reported-by: Zac Ecob <zacecob@protonmail.com>
> >> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> >> ---
> >>   kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 80 insertions(+), 4 deletions(-)
> >>
> >> Changelogs:
> >>    v1 -> v2:
> >>      - Handle more crash cases like 32bit operation and modules.
> >>      - Add more tests to test new cases.
> >>
> >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> >> index f35b80c16cda..ad7f51302c70 100644
> >> --- a/kernel/bpf/verifier.c
> >> +++ b/kernel/bpf/verifier.c
> >> @@ -20499,13 +20499,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> >>                          /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
> >>                          insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
> >>
> >> -               /* Make divide-by-zero exceptions impossible. */
> >> +               /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
> >> +               if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
> >> +                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
> >> +                    insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
> >> +                    insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
> >> +                   insn->off == 1 && insn->imm == -1) {
> >> +                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> >> +                       bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> >> +                       struct bpf_insn *patchlet;
> >> +                       struct bpf_insn chk_and_div[] = {
> >> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> >> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> >> +                                            0, 0, 0),
> >> +                       };
> >> +                       struct bpf_insn chk_and_mod[] = {
> >> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> >> +                       };
> >> +
> >> +                       patchlet = isdiv ? chk_and_div : chk_and_mod;
> > nit: "chk_and_" part in the name is misleading, it's more like
> > "safe_div" and "safe_mod". Oh, and it's "sdiv" and "smod" specific, so
> > probably not a bad idea to have that in the name as well.
>
> good idea. Will use chk_and_sdiv and chk_and_smod.
>
> >
> >> +                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod);
> >> +
> >> +                       new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
> >> +                       if (!new_prog)
> >> +                               return -ENOMEM;
> >> +
> >> +                       delta    += cnt - 1;
> >> +                       env->prog = prog = new_prog;
> >> +                       insn      = new_prog->insnsi + i + delta;
> >> +                       goto next_insn;
> >> +               }
> >> +
> >> +               /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
> >>                  if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
> >>                      insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
> >>                      insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
> >>                      insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
> >>                          bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
> >>                          bool isdiv = BPF_OP(insn->code) == BPF_DIV;
> >> +                       bool is_sdiv = isdiv && insn->off == 1;
> >> +                       bool is_smod = !isdiv && insn->off == 1;
> >>                          struct bpf_insn *patchlet;
> >>                          struct bpf_insn chk_and_div[] = {
> >>                                  /* [R,W]x div 0 -> 0 */
> >> @@ -20525,10 +20558,53 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> >>                                  BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> >>                                  BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
> >>                          };
> >> +                       struct bpf_insn chk_and_sdiv[] = {
> >> +                               /* [R,W]x sdiv 0 -> 0 */
> >> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> >> +                                            BPF_JNE | BPF_K, insn->src_reg,
> >> +                                            0, 2, 0),
> >> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> >> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> >> +                               /* LLONG_MIN sdiv -1 -> LLONG_MIN
> >> +                                * INT_MIN sdiv -1 -> INT_MIN
> >> +                                */
> >> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> >> +                                            BPF_JNE | BPF_K, insn->src_reg,
> >> +                                            0, 2, -1),
> >> +                               /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
> >> +                               BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
> >> +                                            BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
> >> +                                            0, 0, 0),
> >> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> > I don't know how much it actually matters, but it feels like common
> > safe case should be as straight-line-executed as possible, no?
> >
> > So maybe it's better to rearrange to roughly this (where rX is the
> > divisor register):
> >
> >      if rX == 0 goto L1
> >      if rX == -1 goto L2
> >      rY /= rX
> >      goto L3
> > L1: /* zero case */
> >      rY = 0 /* fallthrough, negation doesn't hurt, but less jumping */
> > L2: /* negative one case (or zero) */
> >      rY = -rY
> > L3:
> >      ... the rest of the program code ...
>
> My previous patched insn try to clearly separate rX == 0 and
> rX == -1 case. It has 2 insns including 2 cond jmps, 2 uncond jmps and
> one 3 alu operations. The above one removed one uncond jmp, which
> is indeed better.
>
> >
> >
> > Those two branches for common case are still annoyingly inefficient, I
> > wonder if we should do
> >
> >      rX += 1 /* [-1, 0] -> [0, 1]
> >      if rX <=(unsigned) 1 goto L1
> >      rX -= 1 /* restore original divisor */
> >      rY /= rX /* common case */
> >      goto L3
> > L1:
> >      if rX == 0 goto L2 /* jump if originally -1 */
> >      rY = 0 /* division by zero case */
> > L2: /* fallthrough */
> >      rY = -rY
> >      rX -= 1 /* restore original divisor */
> > L3:
> >      ... continue with the rest ...
> >
> >
> > It's a bit trickier to follow, but should be faster in a common case.
> >
> > WDYT? Too much too far?
>
> This is even better. The above rX -= 1 can be removed if we use
> BPF_REG_AX as the temporary register. For example,
>
>      tmp = rX
>      tmp += 1 /* [-1, 0] -> [0, 1]
>      if tmp <=(unsigned) 1 goto L1
>      rY /= rX /* common case */
>      goto L3
> L1:
>      if tmp == 0 goto L2 /* jump if originally -1 */
>      rY = 0 /* division by zero case */
> L2: /* fallthrough */
>      rY = -rY
> L3:
>      ... continue with the rest ...
>
> Maybe we can do even better
>
>      tmp = rX
>      tmp += 1 /* [-1, 0] -> [0, 1]
>      if tmp >(unsigned) 1 goto L2
>      if tmp == 0 goto L1
>      rY = 0
> L1:
>      rY = -rY;
>      goto L3
> L2:
>      rY /= rX
> L3:
>
> Could this be even better by reducing one uncond jmp in the fast path?

Yep, makes sense to me. Go for it (as far as I'm concerned).

>
> >
> >
> >> +                               *insn,
> >> +                       };
> >> +                       struct bpf_insn chk_and_smod[] = {
> >> +                               /* [R,W]x mod 0 -> [R,W]x */
> >> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> >> +                                            BPF_JNE | BPF_K, insn->src_reg,
> >> +                                            0, 2, 0),
> >> +                               BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
> >> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 4),
> >> +                               /* [R,W]x mod -1 -> 0 */
> >> +                               BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
> >> +                                            BPF_JNE | BPF_K, insn->src_reg,
> >> +                                            0, 2, -1),
> >> +                               BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
> >> +                               BPF_JMP_IMM(BPF_JA, 0, 0, 1),
> >> +                               *insn,
> >> +                       };
> >>
> > Same idea here, keep the common case as straight as possible.
>
> Sure. Will do.
>
> >
> >> -                       patchlet = isdiv ? chk_and_div : chk_and_mod;
> >> -                       cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> >> -                                     ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> >> +                       if (is_sdiv) {
> >> +                               patchlet = chk_and_sdiv;
> >> +                               cnt = ARRAY_SIZE(chk_and_sdiv);
> >> +                       } else if (is_smod) {
> >> +                               patchlet = chk_and_smod;
> >> +                               cnt = ARRAY_SIZE(chk_and_smod);
> >> +                       } else {
> >> +                               patchlet = isdiv ? chk_and_div : chk_and_mod;
> >> +                               cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
> >> +                                             ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
> >> +                       }
> >>
> >>                          new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
> >>                          if (!new_prog)
> >> --
> >> 2.43.5
> >>
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f35b80c16cda..ad7f51302c70 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20499,13 +20499,46 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
 			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
 
-		/* Make divide-by-zero exceptions impossible. */
+		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
+		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
+		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
+		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
+		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
+		    insn->off == 1 && insn->imm == -1) {
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+			struct bpf_insn *patchlet;
+			struct bpf_insn chk_and_div[] = {
+				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+					     BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
+					     0, 0, 0),
+			};
+			struct bpf_insn chk_and_mod[] = {
+				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+			};
+
+			patchlet = isdiv ? chk_and_div : chk_and_mod;
+			cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod);
+
+			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto next_insn;
+		}
+
+		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+			bool is_sdiv = isdiv && insn->off == 1;
+			bool is_smod = !isdiv && insn->off == 1;
 			struct bpf_insn *patchlet;
 			struct bpf_insn chk_and_div[] = {
 				/* [R,W]x div 0 -> 0 */
@@ -20525,10 +20558,53 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
 			};
+			struct bpf_insn chk_and_sdiv[] = {
+				/* [R,W]x sdiv 0 -> 0 */
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JNE | BPF_K, insn->src_reg,
+					     0, 2, 0),
+				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+				BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+				/* LLONG_MIN sdiv -1 -> LLONG_MIN
+				 * INT_MIN sdiv -1 -> INT_MIN
+				 */
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JNE | BPF_K, insn->src_reg,
+					     0, 2, -1),
+				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
+				BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+					     BPF_OP(BPF_NEG) | BPF_K, insn->dst_reg,
+					     0, 0, 0),
+				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+				*insn,
+			};
+			struct bpf_insn chk_and_smod[] = {
+				/* [R,W]x mod 0 -> [R,W]x */
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JNE | BPF_K, insn->src_reg,
+					     0, 2, 0),
+				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
+				BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+				/* [R,W]x mod -1 -> 0 */
+				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+					     BPF_JNE | BPF_K, insn->src_reg,
+					     0, 2, -1),
+				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+				*insn,
+			};
 
-			patchlet = isdiv ? chk_and_div : chk_and_mod;
-			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
-				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+			if (is_sdiv) {
+				patchlet = chk_and_sdiv;
+				cnt = ARRAY_SIZE(chk_and_sdiv);
+			} else if (is_smod) {
+				patchlet = chk_and_smod;
+				cnt = ARRAY_SIZE(chk_and_smod);
+			} else {
+				patchlet = isdiv ? chk_and_div : chk_and_mod;
+				cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+					      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+			}
 
 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
 			if (!new_prog)