diff mbox series

[bpf-next,v4,1/2] bpf: Get better reg range with ldsx and 32bit compare

Message ID 20240718052821.3753486-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,v4,1/2] bpf: Get better reg range with ldsx and 32bit compare | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 661 this patch: 661
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 7 maintainers not CCed: kpsingh@kernel.org martin.lau@linux.dev haoluo@google.com jolsa@kernel.org song@kernel.org sdf@fomichev.me john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 662 this patch: 662
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 672 this patch: 672
netdev/checkpatch warning CHECK: multiple assignments should be avoided WARNING: line length of 81 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Yonghong Song July 18, 2024, 5:28 a.m. UTC
With latest llvm19, the selftest iters/iter_arr_with_actual_elem_count
failed with -mcpu=v4.

The following are the details:
  0: R1=ctx() R10=fp0
  ; int iter_arr_with_actual_elem_count(const void *ctx) @ iters.c:1420
  0: (b4) w7 = 0                        ; R7_w=0
  ; int i, n = loop_data.n, sum = 0; @ iters.c:1422
  1: (18) r1 = 0xffffc90000191478       ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144)
  3: (61) r6 = *(u32 *)(r1 +128)        ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) R6_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
  ; if (n > ARRAY_SIZE(loop_data.data)) @ iters.c:1424
  4: (26) if w6 > 0x20 goto pc+27       ; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
  5: (bf) r8 = r10                      ; R8_w=fp0 R10=fp0
  6: (07) r8 += -8                      ; R8_w=fp-8
  ; bpf_for(i, 0, n) { @ iters.c:1427
  7: (bf) r1 = r8                       ; R1_w=fp-8 R8_w=fp-8
  8: (b4) w2 = 0                        ; R2_w=0
  9: (bc) w3 = w6                       ; R3_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R6_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
  10: (85) call bpf_iter_num_new#45179          ; R0=scalar() fp-8=iter_num(ref_id=2,state=active,depth=0) refs=2
  11: (bf) r1 = r8                      ; R1=fp-8 R8=fp-8 refs=2
  12: (85) call bpf_iter_num_next#45181 13: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R6=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
  ; bpf_for(i, 0, n) { @ iters.c:1427
  13: (15) if r0 == 0x0 goto pc+2       ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) refs=2
  14: (81) r1 = *(s32 *)(r0 +0)         ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff) refs=2
  15: (ae) if w1 < w6 goto pc+4 20: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1=scalar(smin=0xffffffff80000000,smax=smax32=umax32=31,umax=0xffffffff0000001f,smin32=0,var_off=(0x0; 0xffffffff0000001f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
  ; sum += loop_data.data[i]; @ iters.c:1429
  20: (67) r1 <<= 2                     ; R1_w=scalar(smax=0x7ffffffc0000007c,umax=0xfffffffc0000007c,smin32=0,smax32=umax32=124,var_off=(0x0; 0xfffffffc0000007c)) refs=2
  21: (18) r2 = 0xffffc90000191478      ; R2_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) refs=2
  23: (0f) r2 += r1
  math between map_value pointer and register with unbounded min value is not allowed

The source code:
  int iter_arr_with_actual_elem_count(const void *ctx)
  {
        int i, n = loop_data.n, sum = 0;

        if (n > ARRAY_SIZE(loop_data.data))
                return 0;

        bpf_for(i, 0, n) {
                /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
                sum += loop_data.data[i];
        }

        return sum;
  }

The insn #14 is a sign-extenstion load which is related to 'int i'.
The insn #15 did a subreg comparision. Note that smin=0xffffffff80000000 and this caused later
insn #23 failed verification due to unbounded min value.

Actually insn #15 R1 smin range can be better. Before insn #15, we have
  R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff)
With the above range, we know for R1, upper 32bit can only be 0xffffffff or 0.
Otherwise, the value range for R1 could be beyond [smin=0xffffffff80000000,smax=0x7fffffff].

After insn #15, for the true patch, we know smin32=0 and smax32=32. With the upper 32bit 0xffffffff,
then the corresponding value is [0xffffffff00000000, 0xffffffff00000020]. The range is
obviously beyond the original range [smin=0xffffffff80000000,smax=0x7fffffff] and the
range is not possible. So the upper 32bit must be 0, which implies smin = smin32 and
smax = smax32.

This patch fixed the issue by adding additional register deduction after 32-bit compare
insn. If the signed 32-bit register range is non-negative then 64-bit smin is
in range of [S32_MIN, S32_MAX], then the actual 64-bit smin/smax should be the same
as 32-bit smin32/smax32.

With this patch, iters/iter_arr_with_actual_elem_count succeeded with better register range:

from 15 to 20: R0=rdonly_mem(id=7,ref_obj_id=2,sz=4) R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=31,var_off=(0x0; 0x1f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R8=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=3) refs=2

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

Comments

Andrii Nakryiko July 19, 2024, 10:46 p.m. UTC | #1
On Wed, Jul 17, 2024 at 10:28 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> With latest llvm19, the selftest iters/iter_arr_with_actual_elem_count
> failed with -mcpu=v4.
>
> The following are the details:
>   0: R1=ctx() R10=fp0
>   ; int iter_arr_with_actual_elem_count(const void *ctx) @ iters.c:1420
>   0: (b4) w7 = 0                        ; R7_w=0
>   ; int i, n = loop_data.n, sum = 0; @ iters.c:1422
>   1: (18) r1 = 0xffffc90000191478       ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144)
>   3: (61) r6 = *(u32 *)(r1 +128)        ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) R6_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
>   ; if (n > ARRAY_SIZE(loop_data.data)) @ iters.c:1424
>   4: (26) if w6 > 0x20 goto pc+27       ; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
>   5: (bf) r8 = r10                      ; R8_w=fp0 R10=fp0
>   6: (07) r8 += -8                      ; R8_w=fp-8
>   ; bpf_for(i, 0, n) { @ iters.c:1427
>   7: (bf) r1 = r8                       ; R1_w=fp-8 R8_w=fp-8
>   8: (b4) w2 = 0                        ; R2_w=0
>   9: (bc) w3 = w6                       ; R3_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R6_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
>   10: (85) call bpf_iter_num_new#45179          ; R0=scalar() fp-8=iter_num(ref_id=2,state=active,depth=0) refs=2
>   11: (bf) r1 = r8                      ; R1=fp-8 R8=fp-8 refs=2
>   12: (85) call bpf_iter_num_next#45181 13: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R6=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
>   ; bpf_for(i, 0, n) { @ iters.c:1427
>   13: (15) if r0 == 0x0 goto pc+2       ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) refs=2
>   14: (81) r1 = *(s32 *)(r0 +0)         ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff) refs=2
>   15: (ae) if w1 < w6 goto pc+4 20: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1=scalar(smin=0xffffffff80000000,smax=smax32=umax32=31,umax=0xffffffff0000001f,smin32=0,var_off=(0x0; 0xffffffff0000001f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
>   ; sum += loop_data.data[i]; @ iters.c:1429
>   20: (67) r1 <<= 2                     ; R1_w=scalar(smax=0x7ffffffc0000007c,umax=0xfffffffc0000007c,smin32=0,smax32=umax32=124,var_off=(0x0; 0xfffffffc0000007c)) refs=2
>   21: (18) r2 = 0xffffc90000191478      ; R2_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) refs=2
>   23: (0f) r2 += r1
>   math between map_value pointer and register with unbounded min value is not allowed
>
> The source code:
>   int iter_arr_with_actual_elem_count(const void *ctx)
>   {
>         int i, n = loop_data.n, sum = 0;
>
>         if (n > ARRAY_SIZE(loop_data.data))
>                 return 0;
>
>         bpf_for(i, 0, n) {
>                 /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
>                 sum += loop_data.data[i];
>         }
>
>         return sum;
>   }
>
> The insn #14 is a sign-extenstion load which is related to 'int i'.
> The insn #15 did a subreg comparision. Note that smin=0xffffffff80000000 and this caused later
> insn #23 failed verification due to unbounded min value.
>
> Actually insn #15 R1 smin range can be better. Before insn #15, we have
>   R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff)
> With the above range, we know for R1, upper 32bit can only be 0xffffffff or 0.
> Otherwise, the value range for R1 could be beyond [smin=0xffffffff80000000,smax=0x7fffffff].
>
> After insn #15, for the true patch, we know smin32=0 and smax32=32. With the upper 32bit 0xffffffff,
> then the corresponding value is [0xffffffff00000000, 0xffffffff00000020]. The range is
> obviously beyond the original range [smin=0xffffffff80000000,smax=0x7fffffff] and the
> range is not possible. So the upper 32bit must be 0, which implies smin = smin32 and
> smax = smax32.
>
> This patch fixed the issue by adding additional register deduction after 32-bit compare

__reg_deduce_mixed_bounds() is called from reg_bounds_sync() pretty
much after every arithmetic operation or any comparison. Is the above
logic true universally or only after signed comparison? If the latter,
then we can't just do it unconditionally inside
__reg_deduce_mixed_bounds().

> insn. If the signed 32-bit register range is non-negative then 64-bit smin is
> in range of [S32_MIN, S32_MAX], then the actual 64-bit smin/smax should be the same
> as 32-bit smin32/smax32.
>
> With this patch, iters/iter_arr_with_actual_elem_count succeeded with better register range:
>
> from 15 to 20: R0=rdonly_mem(id=7,ref_obj_id=2,sz=4) R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=31,var_off=(0x0; 0x1f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R8=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=3) refs=2
>
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
> Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8da132a1ef28..46532437c4bb 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2182,6 +2182,42 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
>                 reg->smin_value = max_t(s64, reg->smin_value, new_smin);
>                 reg->smax_value = min_t(s64, reg->smax_value, new_smax);
>         }
> +
> +       /* Here we would like to handle a special case after sign extending load,
> +        * when upper bits for a 64-bit range are all 1s or all 0s.
> +        *
> +        * Upper bits are all 1s when register is in a range:
> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
> +        * Upper bits are all 0s when register is in a range:
> +        *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
> +        * Together this forms are continuous range:
> +        *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
> +        *
> +        * Now, suppose that register range is in fact tighter:
> +        *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
> +        * Also suppose that it's 32-bit range is positive,
> +        * meaning that lower 32-bits of the full 64-bit register
> +        * are in the range:
> +        *   [0x0000_0000, 0x7fff_ffff] (W)
> +        *
> +        * If this happens, then any value in a range:
> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
> +        * is smaller than a lowest bound of the range (R):
> +        *   0xffff_ffff_8000_0000
> +        * which means that upper bits of the full 64-bit register
> +        * can't be all 1s, when lower bits are in range (W).
> +        *
> +        * Note that:
> +        *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
> +        *  - 0x0000_0000_ffff_ffff == (s64)S32_MAX

?? S32_MAX = 0x7fffffff, so should the right part be U32_MAX or the
left part should be 0x0000_0000_7fff_ffff ?

> +        * These relations are used in the conditions below.
> +        */
> +       if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
> +               reg->smin_value = reg->umin_value = reg->s32_min_value;
> +               reg->smax_value = reg->umax_value = reg->s32_max_value;

let's please not mix signed and unsigned 32 -> 64 bit conversions,
they are confusing and tricky enough in each domain individually,
there is no point in mixing them

> +               reg->var_off = tnum_intersect(reg->var_off,
> +                                             tnum_range(reg->smin_value, reg->smax_value));
> +       }
>  }
>
>  static void __reg_deduce_bounds(struct bpf_reg_state *reg)
> --
> 2.43.0
>
Eduard Zingerman July 19, 2024, 11:40 p.m. UTC | #2
On Fri, 2024-07-19 at 15:46 -0700, Andrii Nakryiko wrote:

[...]

> > +       /* Here we would like to handle a special case after sign extending load,
> > +        * when upper bits for a 64-bit range are all 1s or all 0s.
> > +        *
> > +        * Upper bits are all 1s when register is in a range:
> > +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
> > +        * Upper bits are all 0s when register is in a range:
> > +        *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
> > +        * Together this forms are continuous range:
> > +        *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
> > +        *
> > +        * Now, suppose that register range is in fact tighter:
> > +        *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
> > +        * Also suppose that it's 32-bit range is positive,
> > +        * meaning that lower 32-bits of the full 64-bit register
> > +        * are in the range:
> > +        *   [0x0000_0000, 0x7fff_ffff] (W)
> > +        *
> > +        * If this happens, then any value in a range:
> > +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
> > +        * is smaller than a lowest bound of the range (R):
> > +        *   0xffff_ffff_8000_0000
> > +        * which means that upper bits of the full 64-bit register
> > +        * can't be all 1s, when lower bits are in range (W).
> > +        *
> > +        * Note that:
> > +        *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
> > +        *  - 0x0000_0000_ffff_ffff == (s64)S32_MAX
> 
> ?? S32_MAX = 0x7fffffff, so should the right part be U32_MAX or the
> left part should be 0x0000_0000_7fff_ffff ?

Oops, that's on me, yes it should be 0x0000_0000_7fff_ffff here.

[...]
Yonghong Song July 22, 2024, 6:16 p.m. UTC | #3
On 7/19/24 3:46 PM, Andrii Nakryiko wrote:
> On Wed, Jul 17, 2024 at 10:28 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> With latest llvm19, the selftest iters/iter_arr_with_actual_elem_count
>> failed with -mcpu=v4.
>>
>> The following are the details:
>>    0: R1=ctx() R10=fp0
>>    ; int iter_arr_with_actual_elem_count(const void *ctx) @ iters.c:1420
>>    0: (b4) w7 = 0                        ; R7_w=0
>>    ; int i, n = loop_data.n, sum = 0; @ iters.c:1422
>>    1: (18) r1 = 0xffffc90000191478       ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144)
>>    3: (61) r6 = *(u32 *)(r1 +128)        ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) R6_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
>>    ; if (n > ARRAY_SIZE(loop_data.data)) @ iters.c:1424
>>    4: (26) if w6 > 0x20 goto pc+27       ; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
>>    5: (bf) r8 = r10                      ; R8_w=fp0 R10=fp0
>>    6: (07) r8 += -8                      ; R8_w=fp-8
>>    ; bpf_for(i, 0, n) { @ iters.c:1427
>>    7: (bf) r1 = r8                       ; R1_w=fp-8 R8_w=fp-8
>>    8: (b4) w2 = 0                        ; R2_w=0
>>    9: (bc) w3 = w6                       ; R3_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R6_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
>>    10: (85) call bpf_iter_num_new#45179          ; R0=scalar() fp-8=iter_num(ref_id=2,state=active,depth=0) refs=2
>>    11: (bf) r1 = r8                      ; R1=fp-8 R8=fp-8 refs=2
>>    12: (85) call bpf_iter_num_next#45181 13: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R6=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
>>    ; bpf_for(i, 0, n) { @ iters.c:1427
>>    13: (15) if r0 == 0x0 goto pc+2       ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) refs=2
>>    14: (81) r1 = *(s32 *)(r0 +0)         ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff) refs=2
>>    15: (ae) if w1 < w6 goto pc+4 20: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1=scalar(smin=0xffffffff80000000,smax=smax32=umax32=31,umax=0xffffffff0000001f,smin32=0,var_off=(0x0; 0xffffffff0000001f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
>>    ; sum += loop_data.data[i]; @ iters.c:1429
>>    20: (67) r1 <<= 2                     ; R1_w=scalar(smax=0x7ffffffc0000007c,umax=0xfffffffc0000007c,smin32=0,smax32=umax32=124,var_off=(0x0; 0xfffffffc0000007c)) refs=2
>>    21: (18) r2 = 0xffffc90000191478      ; R2_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) refs=2
>>    23: (0f) r2 += r1
>>    math between map_value pointer and register with unbounded min value is not allowed
>>
>> The source code:
>>    int iter_arr_with_actual_elem_count(const void *ctx)
>>    {
>>          int i, n = loop_data.n, sum = 0;
>>
>>          if (n > ARRAY_SIZE(loop_data.data))
>>                  return 0;
>>
>>          bpf_for(i, 0, n) {
>>                  /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
>>                  sum += loop_data.data[i];
>>          }
>>
>>          return sum;
>>    }
>>
>> The insn #14 is a sign-extenstion load which is related to 'int i'.
>> The insn #15 did a subreg comparision. Note that smin=0xffffffff80000000 and this caused later
>> insn #23 failed verification due to unbounded min value.
>>
>> Actually insn #15 R1 smin range can be better. Before insn #15, we have
>>    R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff)
>> With the above range, we know for R1, upper 32bit can only be 0xffffffff or 0.
>> Otherwise, the value range for R1 could be beyond [smin=0xffffffff80000000,smax=0x7fffffff].
>>
>> After insn #15, for the true patch, we know smin32=0 and smax32=32. With the upper 32bit 0xffffffff,
>> then the corresponding value is [0xffffffff00000000, 0xffffffff00000020]. The range is
>> obviously beyond the original range [smin=0xffffffff80000000,smax=0x7fffffff] and the
>> range is not possible. So the upper 32bit must be 0, which implies smin = smin32 and
>> smax = smax32.
>>
>> This patch fixed the issue by adding additional register deduction after 32-bit compare
> __reg_deduce_mixed_bounds() is called from reg_bounds_sync() pretty
> much after every arithmetic operation or any comparison. Is the above
> logic true universally or only after signed comparison? If the latter,
> then we can't just do it unconditionally inside
> __reg_deduce_mixed_bounds().

It is not just for signed extension. Some other arithmetic operation may
produce such a range as well.

>
>> insn. If the signed 32-bit register range is non-negative then 64-bit smin is
>> in range of [S32_MIN, S32_MAX], then the actual 64-bit smin/smax should be the same
>> as 32-bit smin32/smax32.
>>
>> With this patch, iters/iter_arr_with_actual_elem_count succeeded with better register range:
>>
>> from 15 to 20: R0=rdonly_mem(id=7,ref_obj_id=2,sz=4) R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=31,var_off=(0x0; 0x1f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R8=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=3) refs=2
>>
>> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
>> Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 36 insertions(+)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 8da132a1ef28..46532437c4bb 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -2182,6 +2182,42 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
>>                  reg->smin_value = max_t(s64, reg->smin_value, new_smin);
>>                  reg->smax_value = min_t(s64, reg->smax_value, new_smax);
>>          }
>> +
>> +       /* Here we would like to handle a special case after sign extending load,
>> +        * when upper bits for a 64-bit range are all 1s or all 0s.
>> +        *
>> +        * Upper bits are all 1s when register is in a range:
>> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
>> +        * Upper bits are all 0s when register is in a range:
>> +        *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
>> +        * Together this forms are continuous range:
>> +        *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
>> +        *
>> +        * Now, suppose that register range is in fact tighter:
>> +        *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
>> +        * Also suppose that it's 32-bit range is positive,
>> +        * meaning that lower 32-bits of the full 64-bit register
>> +        * are in the range:
>> +        *   [0x0000_0000, 0x7fff_ffff] (W)
>> +        *
>> +        * If this happens, then any value in a range:
>> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
>> +        * is smaller than a lowest bound of the range (R):
>> +        *   0xffff_ffff_8000_0000
>> +        * which means that upper bits of the full 64-bit register
>> +        * can't be all 1s, when lower bits are in range (W).
>> +        *
>> +        * Note that:
>> +        *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
>> +        *  - 0x0000_0000_ffff_ffff == (s64)S32_MAX
> ?? S32_MAX = 0x7fffffff, so should the right part be U32_MAX or the
> left part should be 0x0000_0000_7fff_ffff ?
Will make a change in the next revision.
>
>> +        * These relations are used in the conditions below.
>> +        */
>> +       if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
>> +               reg->smin_value = reg->umin_value = reg->s32_min_value;
>> +               reg->smax_value = reg->umax_value = reg->s32_max_value;
> let's please not mix signed and unsigned 32 -> 64 bit conversions,
> they are confusing and tricky enough in each domain individually,
> there is no point in mixing them
Okay, will do.
>
>> +               reg->var_off = tnum_intersect(reg->var_off,
>> +                                             tnum_range(reg->smin_value, reg->smax_value));
>> +       }
>>   }
>>
>>   static void __reg_deduce_bounds(struct bpf_reg_state *reg)
>> --
>> 2.43.0
>>
Andrii Nakryiko July 24, 2024, 10:56 p.m. UTC | #4
On Mon, Jul 22, 2024 at 11:16 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> On 7/19/24 3:46 PM, Andrii Nakryiko wrote:
> > On Wed, Jul 17, 2024 at 10:28 PM Yonghong Song <yonghong.song@linux.dev> wrote:
> >> With latest llvm19, the selftest iters/iter_arr_with_actual_elem_count
> >> failed with -mcpu=v4.
> >>
> >> The following are the details:
> >>    0: R1=ctx() R10=fp0
> >>    ; int iter_arr_with_actual_elem_count(const void *ctx) @ iters.c:1420
> >>    0: (b4) w7 = 0                        ; R7_w=0
> >>    ; int i, n = loop_data.n, sum = 0; @ iters.c:1422
> >>    1: (18) r1 = 0xffffc90000191478       ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144)
> >>    3: (61) r6 = *(u32 *)(r1 +128)        ; R1_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) R6_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
> >>    ; if (n > ARRAY_SIZE(loop_data.data)) @ iters.c:1424
> >>    4: (26) if w6 > 0x20 goto pc+27       ; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
> >>    5: (bf) r8 = r10                      ; R8_w=fp0 R10=fp0
> >>    6: (07) r8 += -8                      ; R8_w=fp-8
> >>    ; bpf_for(i, 0, n) { @ iters.c:1427
> >>    7: (bf) r1 = r8                       ; R1_w=fp-8 R8_w=fp-8
> >>    8: (b4) w2 = 0                        ; R2_w=0
> >>    9: (bc) w3 = w6                       ; R3_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R6_w=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f))
> >>    10: (85) call bpf_iter_num_new#45179          ; R0=scalar() fp-8=iter_num(ref_id=2,state=active,depth=0) refs=2
> >>    11: (bf) r1 = r8                      ; R1=fp-8 R8=fp-8 refs=2
> >>    12: (85) call bpf_iter_num_next#45181 13: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R6=scalar(id=1,smin=smin32=0,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
> >>    ; bpf_for(i, 0, n) { @ iters.c:1427
> >>    13: (15) if r0 == 0x0 goto pc+2       ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) refs=2
> >>    14: (81) r1 = *(s32 *)(r0 +0)         ; R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff) refs=2
> >>    15: (ae) if w1 < w6 goto pc+4 20: R0=rdonly_mem(id=3,ref_obj_id=2,sz=4) R1=scalar(smin=0xffffffff80000000,smax=smax32=umax32=31,umax=0xffffffff0000001f,smin32=0,var_off=(0x0; 0xffffffff0000001f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=0 R8=fp-8 R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=1) refs=2
> >>    ; sum += loop_data.data[i]; @ iters.c:1429
> >>    20: (67) r1 <<= 2                     ; R1_w=scalar(smax=0x7ffffffc0000007c,umax=0xfffffffc0000007c,smin32=0,smax32=umax32=124,var_off=(0x0; 0xfffffffc0000007c)) refs=2
> >>    21: (18) r2 = 0xffffc90000191478      ; R2_w=map_value(map=iters.bss,ks=4,vs=1280,off=1144) refs=2
> >>    23: (0f) r2 += r1
> >>    math between map_value pointer and register with unbounded min value is not allowed
> >>
> >> The source code:
> >>    int iter_arr_with_actual_elem_count(const void *ctx)
> >>    {
> >>          int i, n = loop_data.n, sum = 0;
> >>
> >>          if (n > ARRAY_SIZE(loop_data.data))
> >>                  return 0;
> >>
> >>          bpf_for(i, 0, n) {
> >>                  /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
> >>                  sum += loop_data.data[i];
> >>          }
> >>
> >>          return sum;
> >>    }
> >>
> >> The insn #14 is a sign-extenstion load which is related to 'int i'.
> >> The insn #15 did a subreg comparision. Note that smin=0xffffffff80000000 and this caused later
> >> insn #23 failed verification due to unbounded min value.
> >>
> >> Actually insn #15 R1 smin range can be better. Before insn #15, we have
> >>    R1_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff)
> >> With the above range, we know for R1, upper 32bit can only be 0xffffffff or 0.
> >> Otherwise, the value range for R1 could be beyond [smin=0xffffffff80000000,smax=0x7fffffff].
> >>
> >> After insn #15, for the true patch, we know smin32=0 and smax32=32. With the upper 32bit 0xffffffff,
> >> then the corresponding value is [0xffffffff00000000, 0xffffffff00000020]. The range is
> >> obviously beyond the original range [smin=0xffffffff80000000,smax=0x7fffffff] and the
> >> range is not possible. So the upper 32bit must be 0, which implies smin = smin32 and
> >> smax = smax32.
> >>
> >> This patch fixed the issue by adding additional register deduction after 32-bit compare
> > __reg_deduce_mixed_bounds() is called from reg_bounds_sync() pretty
> > much after every arithmetic operation or any comparison. Is the above
> > logic true universally or only after signed comparison? If the latter,
> > then we can't just do it unconditionally inside
> > __reg_deduce_mixed_bounds().
>
> It is not just for signed extension. Some other arithmetic operation may
> produce such a range as well.

Agreed. It took me a bit to grok this more intuitively, but I think I
got there. :)

>
> >
> >> insn. If the signed 32-bit register range is non-negative then 64-bit smin is
> >> in range of [S32_MIN, S32_MAX], then the actual 64-bit smin/smax should be the same
> >> as 32-bit smin32/smax32.
> >>
> >> With this patch, iters/iter_arr_with_actual_elem_count succeeded with better register range:
> >>
> >> from 15 to 20: R0=rdonly_mem(id=7,ref_obj_id=2,sz=4) R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=31,var_off=(0x0; 0x1f)) R6=scalar(id=1,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=32,var_off=(0x0; 0x3f)) R7=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R8=scalar(id=9,smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R10=fp0 fp-8=iter_num(ref_id=2,state=active,depth=3) refs=2
> >>
> >> Acked-by: Eduard Zingerman <eddyz87@gmail.com>
> >> Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
> >> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> >> ---
> >>   kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 36 insertions(+)
> >>
> >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> >> index 8da132a1ef28..46532437c4bb 100644
> >> --- a/kernel/bpf/verifier.c
> >> +++ b/kernel/bpf/verifier.c
> >> @@ -2182,6 +2182,42 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
> >>                  reg->smin_value = max_t(s64, reg->smin_value, new_smin);
> >>                  reg->smax_value = min_t(s64, reg->smax_value, new_smax);
> >>          }
> >> +
> >> +       /* Here we would like to handle a special case after sign extending load,
> >> +        * when upper bits for a 64-bit range are all 1s or all 0s.
> >> +        *
> >> +        * Upper bits are all 1s when register is in a range:
> >> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
> >> +        * Upper bits are all 0s when register is in a range:
> >> +        *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
> >> +        * Together this forms are continuous range:
> >> +        *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
> >> +        *
> >> +        * Now, suppose that register range is in fact tighter:
> >> +        *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
> >> +        * Also suppose that it's 32-bit range is positive,
> >> +        * meaning that lower 32-bits of the full 64-bit register
> >> +        * are in the range:
> >> +        *   [0x0000_0000, 0x7fff_ffff] (W)
> >> +        *
> >> +        * If this happens, then any value in a range:
> >> +        *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
> >> +        * is smaller than a lowest bound of the range (R):
> >> +        *   0xffff_ffff_8000_0000
> >> +        * which means that upper bits of the full 64-bit register
> >> +        * can't be all 1s, when lower bits are in range (W).
> >> +        *
> >> +        * Note that:
> >> +        *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
> >> +        *  - 0x0000_0000_ffff_ffff == (s64)S32_MAX
> > ?? S32_MAX = 0x7fffffff, so should the right part be U32_MAX or the
> > left part should be 0x0000_0000_7fff_ffff ?
> Will make a change in the next revision.
> >
> >> +        * These relations are used in the conditions below.
> >> +        */
> >> +       if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
> >> +               reg->smin_value = reg->umin_value = reg->s32_min_value;
> >> +               reg->smax_value = reg->umax_value = reg->s32_max_value;
> > let's please not mix signed and unsigned 32 -> 64 bit conversions,
> > they are confusing and tricky enough in each domain individually,
> > there is no point in mixing them
> Okay, will do.
> >
> >> +               reg->var_off = tnum_intersect(reg->var_off,
> >> +                                             tnum_range(reg->smin_value, reg->smax_value));
> >> +       }
> >>   }
> >>
> >>   static void __reg_deduce_bounds(struct bpf_reg_state *reg)
> >> --
> >> 2.43.0
> >>
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8da132a1ef28..46532437c4bb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2182,6 +2182,42 @@  static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
 		reg->smin_value = max_t(s64, reg->smin_value, new_smin);
 		reg->smax_value = min_t(s64, reg->smax_value, new_smax);
 	}
+
+	/* Here we would like to handle a special case after sign extending load,
+	 * when upper bits for a 64-bit range are all 1s or all 0s.
+	 *
+	 * Upper bits are all 1s when register is in a range:
+	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
+	 * Upper bits are all 0s when register is in a range:
+	 *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
+	 * Together this forms are continuous range:
+	 *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
+	 *
+	 * Now, suppose that register range is in fact tighter:
+	 *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
+	 * Also suppose that it's 32-bit range is positive,
+	 * meaning that lower 32-bits of the full 64-bit register
+	 * are in the range:
+	 *   [0x0000_0000, 0x7fff_ffff] (W)
+	 *
+	 * If this happens, then any value in a range:
+	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
+	 * is smaller than a lowest bound of the range (R):
+	 *   0xffff_ffff_8000_0000
+	 * which means that upper bits of the full 64-bit register
+	 * can't be all 1s, when lower bits are in range (W).
+	 *
+	 * Note that:
+	 *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
+	 *  - 0x0000_0000_ffff_ffff == (s64)S32_MAX
+	 * These relations are used in the conditions below.
+	 */
+	if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
+		reg->smin_value = reg->umin_value = reg->s32_min_value;
+		reg->smax_value = reg->umax_value = reg->s32_max_value;
+		reg->var_off = tnum_intersect(reg->var_off,
+					      tnum_range(reg->smin_value, reg->smax_value));
+	}
 }
 
 static void __reg_deduce_bounds(struct bpf_reg_state *reg)