diff mbox series

[bpf-next,v4,16/20] bpf: Add a special case for bitwise AND on range [-1, 0]

Message ID 20240711113828.3818398-4-xukuohai@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Add return value range check for BPF LSM | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 818 this patch: 818
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 5 maintainers not CCed: nathan@kernel.org morbo@google.com llvm@lists.linux.dev ndesaulniers@google.com justinstitt@google.com
netdev/build_clang success Errors and warnings before: 821 this patch: 821
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 886 this patch: 886
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xu Kuohai July 11, 2024, 11:38 a.m. UTC
From: Xu Kuohai <xukuohai@huawei.com>

With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
is rejected by the verifier, and the log says:

0: R1=ctx() R10=fp0
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
0: (b7) r0 = 0                        ; R0_w=0
1: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
2: R1=ctx() R2_w=trusted_ptr_bpf_map()
; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
;  @ test_libbpf_get_fd_by_id_opts.c:0
8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
9: (95) exit

And here is the C code of the prog.

SEC("lsm/bpf_map")
int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
{
    if (map != (struct bpf_map *)&data_input)
	    return 0;

    if (fmode & FMODE_WRITE)
	    return -EACCES;

    return 0;
}

It is clear that the prog can only return either 0 or -EACCESS, and both
values are legal.

So why is it rejected by the verifier?

The verifier log shows that the second if and return value setting
statements in the prog is optimized to bitwise operations "r0 s>>= 63"
and "r0 &= -13". The verifier correctly deduces that the value of
r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
But when the verifier proceeds to verify instruction "r0 &= -13", it
fails to deduce the correct value range of r0.

7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))

So why the verifier fails to deduce the result of 'r0 &= -13'?

The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
"[0, -1ULL]" are encoded to the same tnum. When verifing instruction
"r0 &= -13", the verifier erroneously deduces the result from
"[0, -1ULL] AND -13", which is out of the expected return range
[-4095, 0].

As explained by Eduard in [0], the clang transformation that generates this
pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).

The transformation happens as a part of DAG to DAG rewrites
(LLVM uses several internal representations:
 - generic optimizer uses LLVM IR, most of the work is done
   using this representation;
 - before instruction selection IR is converted to Selection DAG,
   some optimizations are applied at this stage,
   all such optimizations are a set of pattern replacements;
 - Selection DAG is converted to machine code, some optimizations
   are applied at the machine code level).

Full pattern is described as follows:

  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
  // where y is has a single bit set.
  // A plaintext description would be, we can turn the SELECT_CC into an AND
  // when the condition can be materialized as an all-ones register.  Any
  // single bit-test can be materialized as an all-ones register with
  // shift-left and shift-right-arith.

For this particular test case the DAG is converted as follows:

                    .---------------- lhs         The meaning of this select_cc is:
                    |        .------- rhs         `lhs == rhs ? true value : false value`
                    |        | .----- true value
                    |        | |  .-- false value
                    v        v v  v
  (select_cc seteq (and X 2) 0 0 -13)
                          ^
->                        '---------------.
  (and (sra (sll X 62) 63)                |
       -13)                               |
                                          |
Before pattern is applied, it checks that second 'and' operand has
only one bit set, (which is true for '2').

The pattern itself generates logical shift left / arithmetic shift
right pair, that ensures that result is either all ones (-1) or all
zeros (0). Hence, applying 'and' to shifts result and false value
generates a correct result.

As suggested by Eduard and Andrii, this patch makes a special case
for source or destination register of '&=' operation being in
range [-1, 0].

Meaning that one of the '&=' operands is either:
- all ones, in which case the counterpart is the result of the operation;
- all zeros, in which case zero is the result of the operation.

That is, the result is equivalent to adding 0 to the counterpart. And MIN
and MAX values could be deduced based on these observations.

[0] https://lore.kernel.org/bpf/e62e2971301ca7f2e9eb74fc500c520285cad8f5.camel@gmail.com/
[1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Suggested-by: Eduard Zingerman <eddyz87@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 include/linux/tnum.h  |  3 ++
 kernel/bpf/tnum.c     | 25 +++++++++++++++++
 kernel/bpf/verifier.c | 64 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

Comments

Shung-Hsi Yu July 15, 2024, 3:29 p.m. UTC | #1
Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
Nagarakatte, and Matan Shachnai, whom have recently work on
scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
it's a bit long and I'm not sure whether the mailing list will reject
due to too many email in Cc.

On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
> With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
> is rejected by the verifier, and the log says:
> 
> 0: R1=ctx() R10=fp0
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 0: (b7) r0 = 0                        ; R0_w=0
> 1: (79) r2 = *(u64 *)(r1 +0)
> func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
> 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
> ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
> 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
> 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
> ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
> 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> ;  @ test_libbpf_get_fd_by_id_opts.c:0
> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 9: (95) exit
> 
> And here is the C code of the prog.
> 
> SEC("lsm/bpf_map")
> int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
> {
>     if (map != (struct bpf_map *)&data_input)
> 	    return 0;
> 
>     if (fmode & FMODE_WRITE)
> 	    return -EACCES;
> 
>     return 0;
> }
> 
> It is clear that the prog can only return either 0 or -EACCESS, and both
> values are legal.
> 
> So why is it rejected by the verifier?
> 
> The verifier log shows that the second if and return value setting
> statements in the prog is optimized to bitwise operations "r0 s>>= 63"
> and "r0 &= -13". The verifier correctly deduces that the value of
> r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
> But when the verifier proceeds to verify instruction "r0 &= -13", it
> fails to deduce the correct value range of r0.
> 
> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> 
> So why the verifier fails to deduce the result of 'r0 &= -13'?
> 
> The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
> "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
> "r0 &= -13", the verifier erroneously deduces the result from
> "[0, -1ULL] AND -13", which is out of the expected return range
> [-4095, 0].
> 
> As explained by Eduard in [0], the clang transformation that generates this
> pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
...
> As suggested by Eduard and Andrii, this patch makes a special case
> for source or destination register of '&=' operation being in
> range [-1, 0].
...

Been wonder whether it possible for a more general approach ever since I
saw the discussion back in April. I think I've finally got something.

The problem we face here is that the tightest bound for the [-1, 0] case
was tracked with signed ranges, yet the BPF verifier looses knowledge of
them all too quickly in scalar*_min_max_and(); knowledge of previous
signed ranges were not used at all to derive the outcome of signed
ranges after BPF_AND.

	static void scalar_min_max_and(...) {
		...
		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
			dst_reg->smin_value = dst_reg->umin_value;
			dst_reg->smax_value = dst_reg->umax_value;
		} else {
			dst_reg->smin_value = S64_MIN;
			dst_reg->smax_value = S64_MAX;
		}
		...
	}

So looks like its time to be nobody[1] and try to teach BPF verifier how
track signed ranges when ANDing two (possibly) negative numbers. Luckily
bitwise AND is comparatively easier to do than other bitwise operations:
non-negative range & non-negative range is always non-negative,
non-negative range & negative range is still always non-negative, and
negative range & negative range is always negative.

smax_value is straight forwards, we can just do

	max(dst_reg->smax_value, src_reg->smax_value)

which works across all sign combinations. Technically for non-negative &
non-negative we can use min() instead of max(), but the non-negative &
non-negative case should be handled pretty well by the unsigned ranges
already; it seems simpler to let such knowledge flows from unsigned
ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
for non-negative & non-negative by using max(), just imprecise, so no
correctness/soundness issue here.

smin_value is the tricker one, but doable with

	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))

where masked_negative(v) basically just clear all bits after the most
significant unset bit, effectively rounding a negative value down to a
negative power-of-2 value, and returning 0 for non-negative values. E.g.
for some 8-bit, negative value

	masked_negative(0b11101001) == 0b11100000

This can be done with a tweaked version of "Round up to the next highest
power of 2"[2], 

	/* Invert the bits so the first unset bit can be propagated with |= */
	v = ~v;
	/* Now propagate the first (previously unset, now set) bit to the
	 * trailing positions */
	v |= v >> 1;
	v |= v >> 2;
	v |= v >> 4;
	...
	v |= v >> 32; /* Assuming 64-bit */
	/* Propagation done, now invert again */
	v = ~v;

Again, we technically can do better if we take sign bit into account,
but deriving smin_value this way should still be correct/sound across
different sign combinations, and overall should help us derived [-16, 0]
from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
program.

---

Alternatively we can employ a range-splitting trick (think I saw this in
[3]) that allow us to take advantage of existing tnum_and() by splitting
the signed ranges into two if the range crosses the sign boundary (i.e.
contains both non-negative and negative values), one range will be
[smin, U64_MAX], the other will be [0, smax]. This way we get around
tnum's weakness of representing [-1, 0] as [0, U64_MAX].

	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
		src_higher = tnum_range(0, src_reg->smax_value);
	} else {
		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
	}

	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
		dst_higher = tnum_range(0, dst_reg->smax_value);
	} else {
		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
	}

	lower = tnum_and(src_lower, dst_lower);
	higher = tnum_and(src_higher, dst_higher);
	dst->smin_value = lower.value;
	dst->smax_value = higher.value | higher.mask;
	
---

Personally I like the first method better as it is simpler yet still
does the job well enough. I'll work on that in the next few days and see
if it actually works.


1: https://github.com/torvalds/linux/blob/dac045fc9fa6/kernel/bpf/verifier.c#L13338
2: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
3: https://dl.acm.org/doi/10.1145/2651360

...
Xu Kuohai July 16, 2024, 7:05 a.m. UTC | #2
On 7/15/2024 11:29 PM, Shung-Hsi Yu wrote:
> Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
> Nagarakatte, and Matan Shachnai, whom have recently work on
> scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
> it's a bit long and I'm not sure whether the mailing list will reject
> due to too many email in Cc.
> 
> On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
>> With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
>> is rejected by the verifier, and the log says:
>>
>> 0: R1=ctx() R10=fp0
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 0: (b7) r0 = 0                        ; R0_w=0
>> 1: (79) r2 = *(u64 *)(r1 +0)
>> func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
>> 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
>> ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
>> 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
>> 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
>> ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
>> 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
>> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>> ;  @ test_libbpf_get_fd_by_id_opts.c:0
>> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 9: (95) exit
>>
>> And here is the C code of the prog.
>>
>> SEC("lsm/bpf_map")
>> int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
>> {
>>      if (map != (struct bpf_map *)&data_input)
>> 	    return 0;
>>
>>      if (fmode & FMODE_WRITE)
>> 	    return -EACCES;
>>
>>      return 0;
>> }
>>
>> It is clear that the prog can only return either 0 or -EACCESS, and both
>> values are legal.
>>
>> So why is it rejected by the verifier?
>>
>> The verifier log shows that the second if and return value setting
>> statements in the prog is optimized to bitwise operations "r0 s>>= 63"
>> and "r0 &= -13". The verifier correctly deduces that the value of
>> r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
>> But when the verifier proceeds to verify instruction "r0 &= -13", it
>> fails to deduce the correct value range of r0.
>>
>> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>>
>> So why the verifier fails to deduce the result of 'r0 &= -13'?
>>
>> The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
>> "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
>> "r0 &= -13", the verifier erroneously deduces the result from
>> "[0, -1ULL] AND -13", which is out of the expected return range
>> [-4095, 0].
>>
>> As explained by Eduard in [0], the clang transformation that generates this
>> pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
> ...
>> As suggested by Eduard and Andrii, this patch makes a special case
>> for source or destination register of '&=' operation being in
>> range [-1, 0].
> ...
> 
> Been wonder whether it possible for a more general approach ever since I
> saw the discussion back in April. I think I've finally got something.
> 
> The problem we face here is that the tightest bound for the [-1, 0] case
> was tracked with signed ranges, yet the BPF verifier looses knowledge of
> them all too quickly in scalar*_min_max_and(); knowledge of previous
> signed ranges were not used at all to derive the outcome of signed
> ranges after BPF_AND.
> 
> 	static void scalar_min_max_and(...) {
> 		...
> 		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> 			dst_reg->smin_value = dst_reg->umin_value;
> 			dst_reg->smax_value = dst_reg->umax_value;
> 		} else {
> 			dst_reg->smin_value = S64_MIN;
> 			dst_reg->smax_value = S64_MAX;
> 		}
> 		...
> 	}
>

This is indeed the root cause.

> So looks like its time to be nobody[1] and try to teach BPF verifier how
> track signed ranges when ANDing two (possibly) negative numbers. Luckily
> bitwise AND is comparatively easier to do than other bitwise operations:
> non-negative range & non-negative range is always non-negative,
> non-negative range & negative range is still always non-negative, and
> negative range & negative range is always negative.
>

Right, only bitwise ANDing two negatives yields to a negative result.

> smax_value is straight forwards, we can just do
> 
> 	max(dst_reg->smax_value, src_reg->smax_value)
> 
> which works across all sign combinations. Technically for non-negative &
> non-negative we can use min() instead of max(), but the non-negative &
> non-negative case should be handled pretty well by the unsigned ranges
> already; it seems simpler to let such knowledge flows from unsigned
> ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
> for non-negative & non-negative by using max(), just imprecise, so no
> correctness/soundness issue here.
>

I think this is correct, since in two's complement, more '1' bits means
more large, regardless of sign, and bitwise AND never generates more '1'
bits.

> smin_value is the tricker one, but doable with
> 
> 	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))
> 
> where masked_negative(v) basically just clear all bits after the most
> significant unset bit, effectively rounding a negative value down to a
> negative power-of-2 value, and returning 0 for non-negative values. E.g.
> for some 8-bit, negative value
> 
> 	masked_negative(0b11101001) == 0b11100000
>

Ah, it's really tricky. Seems it's the longest high '1' bits sequence
in both operands. This '1' bits should remain unchanged by the bitwise
AND operation. So this sequence must be in the result, making it the
minimum possible value.

> This can be done with a tweaked version of "Round up to the next highest
> power of 2"[2],
> 
> 	/* Invert the bits so the first unset bit can be propagated with |= */
> 	v = ~v;
> 	/* Now propagate the first (previously unset, now set) bit to the
> 	 * trailing positions */
> 	v |= v >> 1;
> 	v |= v >> 2;
> 	v |= v >> 4;
> 	...
> 	v |= v >> 32; /* Assuming 64-bit */
> 	/* Propagation done, now invert again */
> 	v = ~v;
>
> Again, we technically can do better if we take sign bit into account,
> but deriving smin_value this way should still be correct/sound across
> different sign combinations, and overall should help us derived [-16, 0]
> from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
> program.
>
> ---
> 
> Alternatively we can employ a range-splitting trick (think I saw this in
> [3]) that allow us to take advantage of existing tnum_and() by splitting
> the signed ranges into two if the range crosses the sign boundary (i.e.
> contains both non-negative and negative values), one range will be
> [smin, U64_MAX], the other will be [0, smax]. This way we get around
> tnum's weakness of representing [-1, 0] as [0, U64_MAX].
> 
> 	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
> 		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
> 		src_higher = tnum_range(0, src_reg->smax_value);
> 	} else {
> 		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
> 		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
> 	}
> 
> 	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
> 		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
> 		dst_higher = tnum_range(0, dst_reg->smax_value);
> 	} else {
> 		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> 		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> 	}
> 
> 	lower = tnum_and(src_lower, dst_lower);
> 	higher = tnum_and(src_higher, dst_higher);
> 	dst->smin_value = lower.value;
> 	dst->smax_value = higher.value | higher.mask;
>

This looks even more tricky...

> ---
> 
> Personally I like the first method better as it is simpler yet still
> does the job well enough. I'll work on that in the next few days and see
> if it actually works.
> 

This really sounds great. Thank you for the excellent work!

> 
> 1: https://github.com/torvalds/linux/blob/dac045fc9fa6/kernel/bpf/verifier.c#L13338
> 2: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
> 3: https://dl.acm.org/doi/10.1145/2651360
> 
> ...
Shung-Hsi Yu July 16, 2024, 3:19 p.m. UTC | #3
On Tue, Jul 16, 2024 at 03:05:11PM GMT, Xu Kuohai wrote:
> On 7/15/2024 11:29 PM, Shung-Hsi Yu wrote:
> > Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
> > Nagarakatte, and Matan Shachnai, whom have recently work on
> > scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
> > it's a bit long and I'm not sure whether the mailing list will reject
> > due to too many email in Cc.
> > 
> > On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
> > > With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
> > > is rejected by the verifier, and the log says:
> > > 
> > > 0: R1=ctx() R10=fp0
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 0: (b7) r0 = 0                        ; R0_w=0
> > > 1: (79) r2 = *(u64 *)(r1 +0)
> > > func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
> > > 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
> > > ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
> > > 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
> > > 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
> > > ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
> > > 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
> > > 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> > > ;  @ test_libbpf_get_fd_by_id_opts.c:0
> > > 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 9: (95) exit
> > > 
> > > And here is the C code of the prog.
> > > 
> > > SEC("lsm/bpf_map")
> > > int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
> > > {
> > >      if (map != (struct bpf_map *)&data_input)
> > > 	    return 0;
> > > 
> > >      if (fmode & FMODE_WRITE)
> > > 	    return -EACCES;
> > > 
> > >      return 0;
> > > }
> > > 
> > > It is clear that the prog can only return either 0 or -EACCESS, and both
> > > values are legal.
> > > 
> > > So why is it rejected by the verifier?
> > > 
> > > The verifier log shows that the second if and return value setting
> > > statements in the prog is optimized to bitwise operations "r0 s>>= 63"
> > > and "r0 &= -13". The verifier correctly deduces that the value of
> > > r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
> > > But when the verifier proceeds to verify instruction "r0 &= -13", it
> > > fails to deduce the correct value range of r0.
> > > 
> > > 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> > > 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> > > 
> > > So why the verifier fails to deduce the result of 'r0 &= -13'?
> > > 
> > > The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
> > > "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
> > > "r0 &= -13", the verifier erroneously deduces the result from
> > > "[0, -1ULL] AND -13", which is out of the expected return range
> > > [-4095, 0].
> > > 
> > > As explained by Eduard in [0], the clang transformation that generates this
> > > pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
> > ...
> > > As suggested by Eduard and Andrii, this patch makes a special case
> > > for source or destination register of '&=' operation being in
> > > range [-1, 0].
> > ...
> > 
> > Been wonder whether it possible for a more general approach ever since I
> > saw the discussion back in April. I think I've finally got something.
> > 
> > The problem we face here is that the tightest bound for the [-1, 0] case
> > was tracked with signed ranges, yet the BPF verifier looses knowledge of
> > them all too quickly in scalar*_min_max_and(); knowledge of previous
> > signed ranges were not used at all to derive the outcome of signed
> > ranges after BPF_AND.
> > 
> > 	static void scalar_min_max_and(...) {
> > 		...
> > 		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> > 			dst_reg->smin_value = dst_reg->umin_value;
> > 			dst_reg->smax_value = dst_reg->umax_value;
> > 		} else {
> > 			dst_reg->smin_value = S64_MIN;
> > 			dst_reg->smax_value = S64_MAX;
> > 		}
> > 		...
> > 	}
> > 
> 
> This is indeed the root cause.
> 
> > So looks like its time to be nobody[1] and try to teach BPF verifier how
> > track signed ranges when ANDing two (possibly) negative numbers. Luckily
> > bitwise AND is comparatively easier to do than other bitwise operations:
> > non-negative range & non-negative range is always non-negative,
> > non-negative range & negative range is still always non-negative, and
> > negative range & negative range is always negative.
> > 
> 
> Right, only bitwise ANDing two negatives yields to a negative result.
> 
> > smax_value is straight forwards, we can just do
> > 
> > 	max(dst_reg->smax_value, src_reg->smax_value)
> > 
> > which works across all sign combinations. Technically for non-negative &
> > non-negative we can use min() instead of max(), but the non-negative &
> > non-negative case should be handled pretty well by the unsigned ranges
> > already; it seems simpler to let such knowledge flows from unsigned
> > ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
> > for non-negative & non-negative by using max(), just imprecise, so no
> > correctness/soundness issue here.
> > 
> 
> I think this is correct, since in two's complement, more '1' bits means
> more large, regardless of sign, and bitwise AND never generates more '1'
> bits.
> 
> > smin_value is the tricker one, but doable with
> > 
> > 	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))
> > 
> > where masked_negative(v) basically just clear all bits after the most
> > significant unset bit, effectively rounding a negative value down to a
> > negative power-of-2 value, and returning 0 for non-negative values. E.g.
> > for some 8-bit, negative value
> > 
> > 	masked_negative(0b11101001) == 0b11100000
> > 
> 
> Ah, it's really tricky. Seems it's the longest high '1' bits sequence
> in both operands. This '1' bits should remain unchanged by the bitwise
> AND operation. So this sequence must be in the result, making it the
> minimum possible value.
> 
> > This can be done with a tweaked version of "Round up to the next highest
> > power of 2"[2],
> > 
> > 	/* Invert the bits so the first unset bit can be propagated with |= */
> > 	v = ~v;
> > 	/* Now propagate the first (previously unset, now set) bit to the
> > 	 * trailing positions */
> > 	v |= v >> 1;
> > 	v |= v >> 2;
> > 	v |= v >> 4;
> > 	...
> > 	v |= v >> 32; /* Assuming 64-bit */
> > 	/* Propagation done, now invert again */
> > 	v = ~v;
> > 
> > Again, we technically can do better if we take sign bit into account,
> > but deriving smin_value this way should still be correct/sound across
> > different sign combinations, and overall should help us derived [-16, 0]
> > from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
> > program.
> > 
> > ---
> > 
> > Alternatively we can employ a range-splitting trick (think I saw this in
> > [3]) that allow us to take advantage of existing tnum_and() by splitting
> > the signed ranges into two if the range crosses the sign boundary (i.e.
> > contains both non-negative and negative values), one range will be
> > [smin, U64_MAX], the other will be [0, smax]. This way we get around
> > tnum's weakness of representing [-1, 0] as [0, U64_MAX].
> > 
> > 	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
> > 		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
> > 		src_higher = tnum_range(0, src_reg->smax_value);
> > 	} else {
> > 		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
> > 		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
> > 	}
> > 
> > 	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
> > 		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
> > 		dst_higher = tnum_range(0, dst_reg->smax_value);
> > 	} else {
> > 		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> > 		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> > 	}
> > 
> > 	lower = tnum_and(src_lower, dst_lower);
> > 	higher = tnum_and(src_higher, dst_higher);
> > 	dst->smin_value = lower.value;
> > 	dst->smax_value = higher.value | higher.mask;
> 
> This looks even more tricky...

Indeed, and I think the above is still wrong because it did not proper
set smin_value to S64_MIN when needed.

> > Personally I like the first method better as it is simpler yet still
> > does the job well enough. I'll work on that in the next few days and see
> > if it actually works.
> 
> This really sounds great. Thank you for the excellent work!

Sent RFC in sibling thread. I think it would be better if the patch was
included as part of your series. But let's see what the other think of
it first.
diff mbox series

Patch

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 3c13240077b8..5e795d728b9f 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -52,6 +52,9 @@  struct tnum tnum_mul(struct tnum a, struct tnum b);
 /* Return a tnum representing numbers satisfying both @a and @b */
 struct tnum tnum_intersect(struct tnum a, struct tnum b);
 
+/* Return a tnum representing numbers satisfying either @a or @b */
+struct tnum tnum_union(struct tnum a, struct tnum b);
+
 /* Return @a with all but the lowest @size bytes cleared */
 struct tnum tnum_cast(struct tnum a, u8 size);
 
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 9dbc31b25e3d..8028ce06fc1e 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -150,6 +150,31 @@  struct tnum tnum_intersect(struct tnum a, struct tnum b)
 	return TNUM(v & ~mu, mu);
 }
 
+/* Each bit has 3 states: unknown, known 0, known 1. Using x to represent
+ * unknown state, the result of the union of two bits is as follows:
+ *
+ *         | x    0    1
+ *    -----+------------
+ *     x   | x    x    x
+ *     0   | x    0    x
+ *     1   | x    x    1
+ *
+ * For tnum a and b, only the bits that are both known 0 or known 1 in a
+ * and b are known in the result of union a and b.
+ */
+struct tnum tnum_union(struct tnum a, struct tnum b)
+{
+	u64 v0, v1, mu;
+
+	/* unknown bits either in a or b */
+	mu = a.mask | b.mask;
+	/* "known 1" bits in both a and b */
+	v1 = (a.value & b.value) & ~mu;
+	/* "known 0" bits in both a and b */
+	v0 = (~a.value & ~b.value) & ~mu;
+	return TNUM(v1, ~(v0 | v1));
+}
+
 struct tnum tnum_cast(struct tnum a, u8 size)
 {
 	a.value &= (1ULL << (size * 8)) - 1;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 19ef3d27dbb7..7f4ee3b95f4e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13632,6 +13632,39 @@  static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 		return;
 	}
 
+	/* special case: dst_reg is in range [-1, 0] */
+	if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0) {
+		/* the result is equivalent to adding 0 to src_reg */
+		var32_off = tnum_union(src_reg->var_off, tnum_const(0));
+		dst_reg->var_off = tnum_with_subreg(dst_reg->var_off, var32_off);
+		/* update signed min/max to include 0 */
+		dst_reg->s32_min_value = min_t(s32, src_reg->s32_min_value, 0);
+		dst_reg->s32_max_value = max_t(s32, src_reg->s32_max_value, 0);
+		/* since we're adding 0 to src_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->u32_min_value should be 0,
+		 * and dst->u32_max_value should be src_reg->u32_max_value.
+		 */
+		dst_reg->u32_min_value = 0;
+		dst_reg->u32_max_value = src_reg->u32_max_value;
+		return;
+	}
+
+	/* special case: src_reg is in range [-1, 0] */
+	if (src_reg->s32_min_value == -1 && src_reg->s32_max_value == 0) {
+		/* the result is equivalent to adding 0 to dst_reg */
+		var32_off = tnum_union(dst_reg->var_off, tnum_const(0));
+		dst_reg->var_off = tnum_with_subreg(dst_reg->var_off, var32_off);
+		/* update signed min/max to include 0 */
+		dst_reg->s32_min_value = min_t(s32, dst_reg->s32_min_value, 0);
+		dst_reg->s32_max_value = max_t(s32, dst_reg->s32_max_value, 0);
+		/* since we're adding 0 to dst_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->u32_min_value should be 0,
+		 * and dst->u32_max_value should remain unchanged.
+		 */
+		dst_reg->u32_min_value = 0;
+		return;
+	}
+
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 	 */
@@ -13662,6 +13695,37 @@  static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
 		return;
 	}
 
+	/* special case: dst_reg is in range [-1, 0] */
+	if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0) {
+		/* the result is equivalent to adding 0 to src_reg */
+		dst_reg->var_off = tnum_union(src_reg->var_off, tnum_const(0));
+		/* update signed min/max to include 0 */
+		dst_reg->smin_value = min_t(s64, src_reg->smin_value, 0);
+		dst_reg->smax_value = max_t(s64, src_reg->smax_value, 0);
+		/* since we're adding 0 to src_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->umin_value should be 0,
+		 * and dst->umax_value should be src_reg->umax_value.
+		 */
+		dst_reg->umin_value = 0;
+		dst_reg->umax_value = src_reg->umax_value;
+		return;
+	}
+
+	/* special case: src_reg is in range [-1, 0] */
+	if (src_reg->smin_value == -1 && src_reg->smax_value == 0) {
+		/* the result is equivalent to adding 0 to dst_reg */
+		dst_reg->var_off = tnum_union(dst_reg->var_off, tnum_const(0));
+		/* update signed min/max to include 0 */
+		dst_reg->smin_value = min_t(s64, dst_reg->smin_value, 0);
+		dst_reg->smax_value = max_t(s64, dst_reg->smax_value, 0);
+		/* since we're adding 0 to dst_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->min_value should be 0,
+		 * and dst->umax_value should remain unchanged.
+		 */
+		dst_reg->umin_value = 0;
+		return;
+	}
+
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 	 */