diff mbox series

[bpf-next,12/15] bpf: Preserve boundaries and track scalars on narrowing fill

Message ID 20231220214013.3327288-13-maxtram95@gmail.com (mailing list archive)
State New, archived
Delegated to: BPF
Headers show
Series Improvements for tracking scalars in the BPF verifier | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1128 this patch: 1128
netdev/cc_maintainers success CCed 17 of 17 maintainers
netdev/build_clang success Errors and warnings before: 1145 this patch: 1145
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1155 this patch: 1155
netdev/checkpatch fail CHECK: Alignment should match open parenthesis CHECK: spaces preferred around that '%' (ctx:WxV) ERROR: "(foo*)" should be "(foo *)" WARNING: line length of 81 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: suspect code indent for conditional statements (24, 27)
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py

Commit Message

Maxim Mikityanskiy Dec. 20, 2023, 9:40 p.m. UTC
From: Maxim Mikityanskiy <maxim@isovalent.com>

When the width of a fill is smaller than the width of the preceding
spill, the information about scalar boundaries can still be preserved,
as long as it's coerced to the right width (done by coerce_reg_to_size).
Even further, if the actual value fits into the fill width, the ID can
be preserved as well for further tracking of equal scalars.

Implement the above improvements, which makes narrowing fills behave the
same as narrowing spills and MOVs between registers.

Two tests are adjusted to accommodate for endianness differences and to
take into account that it's now allowed to do a narrowing fill from the
least significant bits.

reg_bounds_sync is added to coerce_reg_to_size to correctly adjust
umin/umax boundaries after the var_off truncation, for example, a 64-bit
value 0xXXXXXXXX00000000, when read as a 32-bit, gets umin = 0, umax =
0xFFFFFFFF, var_off = (0x0; 0xffffffff00000000), which needs to be
synced down to umax = 0, otherwise reg_bounds_sanity_check doesn't pass.

Signed-off-by: Maxim Mikityanskiy <maxim@isovalent.com>
---
 kernel/bpf/verifier.c                         | 20 ++++++++++---
 .../selftests/bpf/progs/verifier_spill_fill.c | 28 +++++++++++++------
 2 files changed, 35 insertions(+), 13 deletions(-)

Comments

Shung-Hsi Yu Dec. 26, 2023, 5:29 a.m. UTC | #1
On Wed, Dec 20, 2023 at 11:40:10PM +0200, Maxim Mikityanskiy wrote:
> When the width of a fill is smaller than the width of the preceding
> spill, the information about scalar boundaries can still be preserved,
> as long as it's coerced to the right width (done by coerce_reg_to_size).
> Even further, if the actual value fits into the fill width, the ID can
> be preserved as well for further tracking of equal scalars.
> 
> Implement the above improvements, which makes narrowing fills behave the
> same as narrowing spills and MOVs between registers.
> 
> Two tests are adjusted to accommodate for endianness differences and to
> take into account that it's now allowed to do a narrowing fill from the
> least significant bits.
> 
> reg_bounds_sync is added to coerce_reg_to_size to correctly adjust
> umin/umax boundaries after the var_off truncation, for example, a 64-bit
> value 0xXXXXXXXX00000000, when read as a 32-bit, gets umin = 0, umax =
> 0xFFFFFFFF, var_off = (0x0; 0xffffffff00000000), which needs to be
> synced down to umax = 0, otherwise reg_bounds_sanity_check doesn't pass.
> 
> Signed-off-by: Maxim Mikityanskiy <maxim@isovalent.com>
> ---
>  kernel/bpf/verifier.c                         | 20 ++++++++++---
>  .../selftests/bpf/progs/verifier_spill_fill.c | 28 +++++++++++++------
>  2 files changed, 35 insertions(+), 13 deletions(-)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 9b5053389739..b6e252539e52 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -4772,7 +4772,13 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
>  			if (dst_regno < 0)
>  				return 0;
>  
> -			if (!(off % BPF_REG_SIZE) && size == spill_size) {
> +			if (size <= spill_size &&
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +			    !(off % BPF_REG_SIZE)
> +#else
> +			    !((off + size - spill_size) % BPF_REG_SIZE)
> +#endif

If I understand correctly, it is preferred to keep endianess checking
macro out of verfier.c and have helper function handle them instead.

E.g. See bpf_ctx_narrow_access_offset() from include/linux/filter.h

> [...]
Eduard Zingerman Jan. 4, 2024, 2:27 a.m. UTC | #2
On Wed, 2023-12-20 at 23:40 +0200, Maxim Mikityanskiy wrote:

[...]

The two tests below were added by the following commit:
ef979017b837 ("bpf: selftest: Add verifier tests for <8-byte scalar spill and refill")

As far as I understand, the original intent was to check the behavior
for stack read/write with non-matching size.
I think these tests are redundant after patch #13. Wdyt?

> diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> index 809a09732168..de03e72e07a9 100644
> --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> @@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void)
>  
>  SEC("tc")
>  __description("Spill a u32 const scalar.  Refill as u16.  Offset to skb->data")
> -__failure __msg("invalid access to packet")
> +__success __retval(0)
>  __naked void u16_offset_to_skb_data(void)
>  {
>  	asm volatile ("					\
> @@ -225,19 +225,24 @@ __naked void u16_offset_to_skb_data(void)
>  	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
>  	w4 = 20;					\
>  	*(u32*)(r10 - 8) = r4;				\
> -	r4 = *(u16*)(r10 - 8);				\
> +	r4 = *(u16*)(r10 - %[offset]);			\
>  	r0 = r2;					\
> -	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
> +	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
>  	r0 += r4;					\
> -	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
> +	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
>  	if r0 > r3 goto l0_%=;				\
> -	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
> +	/* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
>  	r0 = *(u32*)(r2 + 0);				\
>  l0_%=:	r0 = 0;						\
>  	exit;						\
>  "	:
>  	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
> -	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
> +	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +	  __imm_const(offset, 8)
> +#else
> +	  __imm_const(offset, 6)
> +#endif
>  	: __clobber_all);
>  }
>  
> @@ -270,7 +275,7 @@ l0_%=:	r0 = 0;						\
>  }
>  
>  SEC("tc")
> -__description("Spill a u32 const scalar.  Refill as u16 from fp-6.  Offset to skb->data")
> +__description("Spill a u32 const scalar.  Refill as u16 from MSB.  Offset to skb->data")
>  __failure __msg("invalid access to packet")
>  __naked void _6_offset_to_skb_data(void)
>  {
> @@ -279,7 +284,7 @@ __naked void _6_offset_to_skb_data(void)
>  	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
>  	w4 = 20;					\
>  	*(u32*)(r10 - 8) = r4;				\
> -	r4 = *(u16*)(r10 - 6);				\
> +	r4 = *(u16*)(r10 - %[offset]);			\
>  	r0 = r2;					\
>  	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
>  	r0 += r4;					\
> @@ -291,7 +296,12 @@ l0_%=:	r0 = 0;						\
>  	exit;						\
>  "	:
>  	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
> -	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
> +	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +	  __imm_const(offset, 6)
> +#else
> +	  __imm_const(offset, 8)
> +#endif
>  	: __clobber_all);
>  }
>
Maxim Mikityanskiy Jan. 5, 2024, 5:48 p.m. UTC | #3
On Thu, 04 Jan 2024 at 04:27:00 +0200, Eduard Zingerman wrote:
> On Wed, 2023-12-20 at 23:40 +0200, Maxim Mikityanskiy wrote:
> 
> [...]
> 
> The two tests below were added by the following commit:
> ef979017b837 ("bpf: selftest: Add verifier tests for <8-byte scalar spill and refill")
> 
> As far as I understand, the original intent was to check the behavior
> for stack read/write with non-matching size.
> I think these tests are redundant after patch #13. Wdyt?

_6_offset_to_skb_data is for sure not redundant. I don't test a partial
fill from the most significant bits in my patch 13.

u16_offset_to_skb_data is somewhat similar to
fill_32bit_after_spill_64bit, but they aren't exactly the same: the
former spills (u32)20 and fills (u16)20 (the same value), while my test
spills (u64)0xXXXXXXXX00000000 and fills (u32)0 (the most significant
bits are stripped). Maybe u16_offset_to_skb_data is redundant, but more
coverage is better than less coverage, isn't it?

> > diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> > index 809a09732168..de03e72e07a9 100644
> > --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> > +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
> > @@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void)
> >  
> >  SEC("tc")
> >  __description("Spill a u32 const scalar.  Refill as u16.  Offset to skb->data")
> > -__failure __msg("invalid access to packet")
> > +__success __retval(0)
> >  __naked void u16_offset_to_skb_data(void)
> >  {
> >  	asm volatile ("					\
> > @@ -225,19 +225,24 @@ __naked void u16_offset_to_skb_data(void)
> >  	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
> >  	w4 = 20;					\
> >  	*(u32*)(r10 - 8) = r4;				\
> > -	r4 = *(u16*)(r10 - 8);				\
> > +	r4 = *(u16*)(r10 - %[offset]);			\
> >  	r0 = r2;					\
> > -	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
> > +	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
> >  	r0 += r4;					\
> > -	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
> > +	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
> >  	if r0 > r3 goto l0_%=;				\
> > -	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
> > +	/* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
> >  	r0 = *(u32*)(r2 + 0);				\
> >  l0_%=:	r0 = 0;						\
> >  	exit;						\
> >  "	:
> >  	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
> > -	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
> > +	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
> > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> > +	  __imm_const(offset, 8)
> > +#else
> > +	  __imm_const(offset, 6)
> > +#endif
> >  	: __clobber_all);
> >  }
> >  
> > @@ -270,7 +275,7 @@ l0_%=:	r0 = 0;						\
> >  }
> >  
> >  SEC("tc")
> > -__description("Spill a u32 const scalar.  Refill as u16 from fp-6.  Offset to skb->data")
> > +__description("Spill a u32 const scalar.  Refill as u16 from MSB.  Offset to skb->data")
> >  __failure __msg("invalid access to packet")
> >  __naked void _6_offset_to_skb_data(void)
> >  {
> > @@ -279,7 +284,7 @@ __naked void _6_offset_to_skb_data(void)
> >  	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
> >  	w4 = 20;					\
> >  	*(u32*)(r10 - 8) = r4;				\
> > -	r4 = *(u16*)(r10 - 6);				\
> > +	r4 = *(u16*)(r10 - %[offset]);			\
> >  	r0 = r2;					\
> >  	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
> >  	r0 += r4;					\
> > @@ -291,7 +296,12 @@ l0_%=:	r0 = 0;						\
> >  	exit;						\
> >  "	:
> >  	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
> > -	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
> > +	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
> > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> > +	  __imm_const(offset, 6)
> > +#else
> > +	  __imm_const(offset, 8)
> > +#endif
> >  	: __clobber_all);
> >  }
> >  
> 
> 
>
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9b5053389739..b6e252539e52 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4772,7 +4772,13 @@  static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 			if (dst_regno < 0)
 				return 0;
 
-			if (!(off % BPF_REG_SIZE) && size == spill_size) {
+			if (size <= spill_size &&
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+			    !(off % BPF_REG_SIZE)
+#else
+			    !((off + size - spill_size) % BPF_REG_SIZE)
+#endif
+			   ) {
 				/* The earlier check_reg_arg() has decided the
 				 * subreg_def for this insn.  Save it first.
 				 */
@@ -4780,6 +4786,12 @@  static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 
 				copy_register_state(&state->regs[dst_regno], reg);
 				state->regs[dst_regno].subreg_def = subreg_def;
+
+				/* Break the relation on a narrowing fill.
+				 * coerce_reg_to_size will adjust the boundaries.
+				 */
+				if (get_reg_width(reg) > size * BITS_PER_BYTE)
+					state->regs[dst_regno].id = 0;
 			} else {
 				int spill_cnt = 0, zero_cnt = 0;
 
@@ -6055,10 +6067,10 @@  static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 	 * values are also truncated so we push 64-bit bounds into
 	 * 32-bit bounds. Above were truncated < 32-bits already.
 	 */
-	if (size < 4) {
+	if (size < 4)
 		__mark_reg32_unbounded(reg);
-		reg_bounds_sync(reg);
-	}
+
+	reg_bounds_sync(reg);
 }
 
 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 809a09732168..de03e72e07a9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -217,7 +217,7 @@  __naked void uninit_u32_from_the_stack(void)
 
 SEC("tc")
 __description("Spill a u32 const scalar.  Refill as u16.  Offset to skb->data")
-__failure __msg("invalid access to packet")
+__success __retval(0)
 __naked void u16_offset_to_skb_data(void)
 {
 	asm volatile ("					\
@@ -225,19 +225,24 @@  __naked void u16_offset_to_skb_data(void)
 	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
 	w4 = 20;					\
 	*(u32*)(r10 - 8) = r4;				\
-	r4 = *(u16*)(r10 - 8);				\
+	r4 = *(u16*)(r10 - %[offset]);			\
 	r0 = r2;					\
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
 	r0 += r4;					\
-	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
 	if r0 > r3 goto l0_%=;				\
-	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+	/* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
 	r0 = *(u32*)(r2 + 0);				\
 l0_%=:	r0 = 0;						\
 	exit;						\
 "	:
 	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
-	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	  __imm_const(offset, 8)
+#else
+	  __imm_const(offset, 6)
+#endif
 	: __clobber_all);
 }
 
@@ -270,7 +275,7 @@  l0_%=:	r0 = 0;						\
 }
 
 SEC("tc")
-__description("Spill a u32 const scalar.  Refill as u16 from fp-6.  Offset to skb->data")
+__description("Spill a u32 const scalar.  Refill as u16 from MSB.  Offset to skb->data")
 __failure __msg("invalid access to packet")
 __naked void _6_offset_to_skb_data(void)
 {
@@ -279,7 +284,7 @@  __naked void _6_offset_to_skb_data(void)
 	r3 = *(u32*)(r1 + %[__sk_buff_data_end]);	\
 	w4 = 20;					\
 	*(u32*)(r10 - 8) = r4;				\
-	r4 = *(u16*)(r10 - 6);				\
+	r4 = *(u16*)(r10 - %[offset]);			\
 	r0 = r2;					\
 	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
 	r0 += r4;					\
@@ -291,7 +296,12 @@  l0_%=:	r0 = 0;						\
 	exit;						\
 "	:
 	: __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
-	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+	  __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	  __imm_const(offset, 6)
+#else
+	  __imm_const(offset, 8)
+#endif
 	: __clobber_all);
 }