diff mbox series

[bpf-next,v2,03/15] bpf: Support new sign-extension mov insns

Message ID 20230713060734.390551-1-yhs@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support new insns from cpu v4 | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 27 this patch: 27
netdev/cc_maintainers warning 16 maintainers not CCed: tglx@linutronix.de hpa@zytor.com dsahern@kernel.org mingo@redhat.com kpsingh@kernel.org x86@kernel.org john.fastabend@gmail.com sdf@google.com netdev@vger.kernel.org martin.lau@linux.dev song@kernel.org dave.hansen@linux.intel.com davem@davemloft.net jolsa@kernel.org haoluo@google.com bp@alien8.de
netdev/build_clang fail Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 27 this patch: 27
netdev/checkpatch warning CHECK: No space is necessary after a cast WARNING: Too many leading tabs - consider code refactoring WARNING: line length of 101 exceeds 80 columns WARNING: line length of 103 exceeds 80 columns WARNING: line length of 109 exceeds 80 columns WARNING: line length of 111 exceeds 80 columns WARNING: line length of 115 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yonghong Song July 13, 2023, 6:07 a.m. UTC
Add interpreter/jit support for new sign-extension mov insns.
The original 'MOV' insn is extended to support signed version
for both ALU and ALU64 operations. For ALU mode,
the insn->off value of 8 or 16 indicates sign-extension
from 8- or 16-bit value to 32-bit value. For ALU64 mode,
the insn->off value of 8/16/32 indicates sign-extension
from 8-, 16- or 32-bit value to 64-bit value.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 arch/x86/net/bpf_jit_comp.c |  43 ++++++++++-
 kernel/bpf/core.c           |  28 ++++++-
 kernel/bpf/verifier.c       | 150 +++++++++++++++++++++++++++++-------
 3 files changed, 190 insertions(+), 31 deletions(-)

Comments

Eduard Zingerman July 17, 2023, 1:41 a.m. UTC | #1
On Wed, 2023-07-12 at 23:07 -0700, Yonghong Song wrote:
> > Add interpreter/jit support for new sign-extension mov insns.
> > The original 'MOV' insn is extended to support signed version
> > for both ALU and ALU64 operations. For ALU mode,
> > the insn->off value of 8 or 16 indicates sign-extension
> > from 8- or 16-bit value to 32-bit value. For ALU64 mode,
> > the insn->off value of 8/16/32 indicates sign-extension
> > from 8-, 16- or 32-bit value to 64-bit value.
> > 
> > Signed-off-by: Yonghong Song <yhs@fb.com>
> > ---
> >  arch/x86/net/bpf_jit_comp.c |  43 ++++++++++-
> >  kernel/bpf/core.c           |  28 ++++++-
> >  kernel/bpf/verifier.c       | 150 +++++++++++++++++++++++++++++-------
> >  3 files changed, 190 insertions(+), 31 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index addeea95f397..a740a1a6e71d 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -701,6 +701,38 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
> >  	*pprog = prog;
> >  }
> >  
> > +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
> > +			   u32 src_reg)
> > +{
> > +	u8 *prog = *pprog;
> > +
> > +	if (is64) {
> > +		/* movs[b,w,l]q dst, src */
> > +		if (num_bits == 8)
> > +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
> > +			      add_2reg(0xC0, src_reg, dst_reg));
> > +		else if (num_bits == 16)
> > +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
> > +			      add_2reg(0xC0, src_reg, dst_reg));
> > +		else if (num_bits == 32)
> > +			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
> > +			      add_2reg(0xC0, src_reg, dst_reg));
> > +	} else {
> > +		/* movs[b,w]l dst, src */
> > +		if (num_bits == 8) {
> > +			EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
> > +			      add_2reg(0xC0, src_reg, dst_reg));

Nit: As far as I understand 4-126 Vol. 2B of [1]
     the 0x40 prefix (REX prefix) is optional here
     (same as implemented below for num_bits == 16).

[1] https://cdrdv2.intel.com/v1/dl/getContent/671200


> > +		} else if (num_bits == 16) {
> > +			if (is_ereg(dst_reg) || is_ereg(src_reg))
> > +				EMIT1(add_2mod(0x40, src_reg, dst_reg));
> > +			EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,

Nit: Basing on the same manual I don't understand why 
     add_2mod(0x0f, src_reg, dst_reg) is used, '0xf' should suffice
     (but I tried it both ways and it works...).

> > +			      add_2reg(0xC0, src_reg, dst_reg));
> > +		}
> > +	}
> > +
> > +	*pprog = prog;
> > +}
> > +
> >  /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
> >  static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
> >  {
> > @@ -1051,9 +1083,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
> >  
> >  		case BPF_ALU64 | BPF_MOV | BPF_X:
> >  		case BPF_ALU | BPF_MOV | BPF_X:
> > -			emit_mov_reg(&prog,
> > -				     BPF_CLASS(insn->code) == BPF_ALU64,
> > -				     dst_reg, src_reg);
> > +			if (insn->off == 0)
> > +				emit_mov_reg(&prog,
> > +					     BPF_CLASS(insn->code) == BPF_ALU64,
> > +					     dst_reg, src_reg);
> > +			else
> > +				emit_movsx_reg(&prog, insn->off,
> > +					       BPF_CLASS(insn->code) == BPF_ALU64,
> > +					       dst_reg, src_reg);
> >  			break;
> >  
> >  			/* neg dst */
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index 8a1cc658789e..fe648a158c9e 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -61,6 +61,7 @@
> >  #define AX	regs[BPF_REG_AX]
> >  #define ARG1	regs[BPF_REG_ARG1]
> >  #define CTX	regs[BPF_REG_CTX]
> > +#define OFF	insn->off
> >  #define IMM	insn->imm
> >  
> >  struct bpf_mem_alloc bpf_global_ma;
> > @@ -1736,13 +1737,36 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
> >  		DST = -DST;
> >  		CONT;
> >  	ALU_MOV_X:
> > -		DST = (u32) SRC;
> > +		switch (OFF) {
> > +		case 0:
> > +			DST = (u32) SRC;
> > +			break;
> > +		case 8:
> > +			DST = (u32)(s8) SRC;
> > +			break;
> > +		case 16:
> > +			DST = (u32)(s16) SRC;
> > +			break;
> > +		}
> >  		CONT;
> >  	ALU_MOV_K:
> >  		DST = (u32) IMM;
> >  		CONT;
> >  	ALU64_MOV_X:
> > -		DST = SRC;
> > +		switch (OFF) {
> > +		case 0:
> > +			DST = SRC;
> > +			break;
> > +		case 8:
> > +			DST = (s8) SRC;
> > +			break;
> > +		case 16:
> > +			DST = (s16) SRC;
> > +			break;
> > +		case 32:
> > +			DST = (s32) SRC;
> > +			break;
> > +		}
> >  		CONT;
> >  	ALU64_MOV_K:
> >  		DST = IMM;
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index fbe4ca72d4c1..5fee9f24cb5e 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -3421,7 +3421,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
> >  			return 0;
> >  		if (opcode == BPF_MOV) {
> >  			if (BPF_SRC(insn->code) == BPF_X) {
> > -				/* dreg = sreg
> > +				/* dreg = sreg or dreg = (s8, s16, s32)sreg
> >  				 * dreg needs precision after this insn
> >  				 * sreg needs precision before this insn
> >  				 */
> > @@ -5866,6 +5866,64 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
> >  	set_sext64_default_val(reg, size);
> >  }
> >  
> > +static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
> > +{
> > +	if (size == 1) {
> > +		reg->s32_min_value = S8_MIN;
> > +		reg->s32_max_value = S8_MAX;
> > +	} else {
> > +		/* size == 2 */
> > +		reg->s32_min_value = S16_MIN;
> > +		reg->s32_max_value = S16_MAX;
> > +	}
> > +	reg->u32_min_value = 0;
> > +	reg->u32_max_value = U32_MAX;
> > +}
> > +
> > +static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
> > +{
> > +	s32 init_s32_max, init_s32_min, s32_max, s32_min;
> > +	u32 top_smax_value, top_smin_value;
> > +	u32 num_bits = size * 8;
> > +
> > +	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
> > +	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
> > +
> > +	if (top_smax_value != top_smin_value)
> > +		goto out;
> > +
> > +	/* find the s32_min and s32_min after sign extension */
> > +	if (size == 1) {
> > +		init_s32_max = (s8)reg->s32_max_value;
> > +		init_s32_min = (s8)reg->s32_min_value;
> > +	} else {
> > +		/* size == 2 */
> > +		init_s32_max = (s16)reg->s32_max_value;
> > +		init_s32_min = (s16)reg->s32_min_value;
> > +	}
> > +	s32_max = max(init_s32_max, init_s32_min);
> > +	s32_min = min(init_s32_max, init_s32_min);
> > +
> > +	if (s32_min >= 0 && s32_max >= 0) {
> > +		reg->s32_min_value = s32_min;
> > +		reg->s32_max_value = s32_max;
> > +		reg->u32_min_value = 0;
> > +		reg->u32_max_value = U32_MAX;
> > +		return;
> > +	}
> > +
> > +	if (s32_min < 0 && s32_max < 0) {
> > +		reg->s32_min_value = s32_min;
> > +		reg->s32_max_value = s32_max;
> > +		reg->u32_min_value = (u32)s32_max;
> > +		reg->u32_max_value = (u32)s32_min;
> > +		return;
> > +	}
> > +
> > +out:
> > +	set_sext32_default_val(reg, size);
> > +}
> > +
> >  static bool bpf_map_is_rdonly(const struct bpf_map *map)
> >  {
> >  	/* A map is considered read-only if the following condition are true:
> > @@ -13003,11 +13061,23 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >  	} else if (opcode == BPF_MOV) {
> >  
> >  		if (BPF_SRC(insn->code) == BPF_X) {
> > -			if (insn->imm != 0 || insn->off != 0) {
> > +			if (insn->imm != 0) {
> >  				verbose(env, "BPF_MOV uses reserved fields\n");
> >  				return -EINVAL;
> >  			}
> >  
> > +			if (BPF_CLASS(insn->code) == BPF_ALU) {
> > +				if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
> > +					verbose(env, "BPF_MOV uses reserved fields\n");
> > +					return -EINVAL;
> > +				}
> > +			} else {
> > +				if (insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) {
> > +					verbose(env, "BPF_MOV uses reserved fields\n");
> > +					return -EINVAL;
> > +				}
> > +			}
> > +
> >  			/* check src operand */
> >  			err = check_reg_arg(env, insn->src_reg, SRC_OP);
> >  			if (err)
> > @@ -13031,18 +13101,32 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >  				       !tnum_is_const(src_reg->var_off);
> >  
> >  			if (BPF_CLASS(insn->code) == BPF_ALU64) {
> > -				/* case: R1 = R2
> > -				 * copy register state to dest reg
> > -				 */
> > -				if (need_id)
> > -					/* Assign src and dst registers the same ID
> > -					 * that will be used by find_equal_scalars()
> > -					 * to propagate min/max range.
> > +				if (insn->off == 0) {
> > +					/* case: R1 = R2
> > +					 * copy register state to dest reg
> >  					 */
> > -					src_reg->id = ++env->id_gen;
> > -				copy_register_state(dst_reg, src_reg);
> > -				dst_reg->live |= REG_LIVE_WRITTEN;
> > -				dst_reg->subreg_def = DEF_NOT_SUBREG;
> > +					if (need_id)
> > +						/* Assign src and dst registers the same ID
> > +						 * that will be used by find_equal_scalars()
> > +						 * to propagate min/max range.
> > +						 */
> > +						src_reg->id = ++env->id_gen;
> > +					copy_register_state(dst_reg, src_reg);
> > +					dst_reg->live |= REG_LIVE_WRITTEN;
> > +					dst_reg->subreg_def = DEF_NOT_SUBREG;
> > +				} else {
> > +					/* case: R1 = (s8, s16 s32)R2 */
> > +					bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
> > +
> > +					if (no_sext && need_id)
> > +						src_reg->id = ++env->id_gen;
> > +					copy_register_state(dst_reg, src_reg);
> > +					if (!no_sext)
> > +						dst_reg->id = 0;
> > +					coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
> > +					dst_reg->live |= REG_LIVE_WRITTEN;
> > +					dst_reg->subreg_def = DEF_NOT_SUBREG;
> > +				}
> >  			} else {
> >  				/* R1 = (u32) R2 */
> >  				if (is_pointer_value(env, insn->src_reg)) {
> > @@ -13051,19 +13135,33 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >  						insn->src_reg);
> >  					return -EACCES;
> >  				} else if (src_reg->type == SCALAR_VALUE) {
> > -					bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
> > -
> > -					if (is_src_reg_u32 && need_id)
> > -						src_reg->id = ++env->id_gen;
> > -					copy_register_state(dst_reg, src_reg);
> > -					/* Make sure ID is cleared if src_reg is not in u32 range otherwise
> > -					 * dst_reg min/max could be incorrectly
> > -					 * propagated into src_reg by find_equal_scalars()
> > -					 */
> > -					if (!is_src_reg_u32)
> > -						dst_reg->id = 0;
> > -					dst_reg->live |= REG_LIVE_WRITTEN;
> > -					dst_reg->subreg_def = env->insn_idx + 1;
> > +					if (insn->off == 0) {
> > +						bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
> > +
> > +						if (is_src_reg_u32 && need_id)
> > +							src_reg->id = ++env->id_gen;
> > +						copy_register_state(dst_reg, src_reg);
> > +						/* Make sure ID is cleared if src_reg is not in u32 range otherwise
> > +						 * dst_reg min/max could be incorrectly
> > +						 * propagated into src_reg by find_equal_scalars()
> > +						 */
> > +						if (!is_src_reg_u32)
> > +							dst_reg->id = 0;
> > +						dst_reg->live |= REG_LIVE_WRITTEN;
> > +						dst_reg->subreg_def = env->insn_idx + 1;
> > +					} else {
> > +						/* case: W1 = (s8, s16)W2 */
> > +						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
> > +
> > +						if (no_sext && need_id)
> > +							src_reg->id = ++env->id_gen;
> > +						copy_register_state(dst_reg, src_reg);
> > +						if (!no_sext)
> > +							dst_reg->id = 0;
> > +						dst_reg->live |= REG_LIVE_WRITTEN;
> > +						dst_reg->subreg_def = env->insn_idx + 1;
> > +						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);

I tried the following test program:

{
 "testtesttest",
 .insns = {
 BPF_MOV64_IMM(BPF_REG_7, 0xffff),
 {
 .code = BPF_ALU | BPF_MOV | BPF_X,
 .dst_reg = BPF_REG_0,
 .src_reg = BPF_REG_7,
 .off = 16,
 .imm = 0,
 },
 BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 1),
 BPF_EXIT_INSN(),
 },
 .result = ACCEPT,
 .retval = 0,
},

And it produces verification log as below:

 0: R1=ctx(off=0,imm=0) R10=fp0
 0: (b7) r7 = 65535    ; R7_w=P65535
 1: (bc) w0 = w7       ; R0_w=P65535 R7_w=P65535
 2: (77) r0 >>= 1      ; R0_w=P32767
 3: (95) exit
 ...
 FAIL retval 2147483647 != 0 (run 1/1) 

Note that verifier considers R0 to be 0x7FFF at 3,
while actual value during execution is 0x7FFF'FFFF.

> > +					}
> >  				} else {
> >  					mark_reg_unknown(env, regs,
> >  							 insn->dst_reg);
Yonghong Song July 19, 2023, 1:17 a.m. UTC | #2
On 7/16/23 6:41 PM, Eduard Zingerman wrote:
> On Wed, 2023-07-12 at 23:07 -0700, Yonghong Song wrote:
>>> Add interpreter/jit support for new sign-extension mov insns.
>>> The original 'MOV' insn is extended to support signed version
>>> for both ALU and ALU64 operations. For ALU mode,
>>> the insn->off value of 8 or 16 indicates sign-extension
>>> from 8- or 16-bit value to 32-bit value. For ALU64 mode,
>>> the insn->off value of 8/16/32 indicates sign-extension
>>> from 8-, 16- or 32-bit value to 64-bit value.
>>>
>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>> ---
>>>   arch/x86/net/bpf_jit_comp.c |  43 ++++++++++-
>>>   kernel/bpf/core.c           |  28 ++++++-
>>>   kernel/bpf/verifier.c       | 150 +++++++++++++++++++++++++++++-------
>>>   3 files changed, 190 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>>> index addeea95f397..a740a1a6e71d 100644
>>> --- a/arch/x86/net/bpf_jit_comp.c
>>> +++ b/arch/x86/net/bpf_jit_comp.c
>>> @@ -701,6 +701,38 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
>>>   	*pprog = prog;
>>>   }
>>>   
>>> +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
>>> +			   u32 src_reg)
>>> +{
>>> +	u8 *prog = *pprog;
>>> +
>>> +	if (is64) {
>>> +		/* movs[b,w,l]q dst, src */
>>> +		if (num_bits == 8)
>>> +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
>>> +			      add_2reg(0xC0, src_reg, dst_reg));
>>> +		else if (num_bits == 16)
>>> +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
>>> +			      add_2reg(0xC0, src_reg, dst_reg));
>>> +		else if (num_bits == 32)
>>> +			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
>>> +			      add_2reg(0xC0, src_reg, dst_reg));
>>> +	} else {
>>> +		/* movs[b,w]l dst, src */
>>> +		if (num_bits == 8) {
>>> +			EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
>>> +			      add_2reg(0xC0, src_reg, dst_reg));
> 
> Nit: As far as I understand 4-126 Vol. 2B of [1]
>       the 0x40 prefix (REX prefix) is optional here
>       (same as implemented below for num_bits == 16).

I think 0x40 prefix at least neededif register is from R8 - R15?
I use this website to do asm/disasm experiments and did
try various combinations with first 8 and later 8 registers
and it seems correct results are generated.


> 
> [1] https://cdrdv2.intel.com/v1/dl/getContent/671200
> 
> 
>>> +		} else if (num_bits == 16) {
>>> +			if (is_ereg(dst_reg) || is_ereg(src_reg))
>>> +				EMIT1(add_2mod(0x40, src_reg, dst_reg));
>>> +			EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
> 
> Nit: Basing on the same manual I don't understand why
>       add_2mod(0x0f, src_reg, dst_reg) is used, '0xf' should suffice
>       (but I tried it both ways and it works...).

 From the above online assembler website.

But I will check the doc to see whether it can be simplified.

> 
>>> +			      add_2reg(0xC0, src_reg, dst_reg));
>>> +		}
>>> +	}
>>> +
>>> +	*pprog = prog;
>>> +}
>>> +
>>>   /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
>>>   static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
>>>   {
>>> @@ -1051,9 +1083,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>>   
>>>   		case BPF_ALU64 | BPF_MOV | BPF_X:
>>>   		case BPF_ALU | BPF_MOV | BPF_X:
>>> -			emit_mov_reg(&prog,
>>> -				     BPF_CLASS(insn->code) == BPF_ALU64,
>>> -				     dst_reg, src_reg);
>>> +			if (insn->off == 0)
>>> +				emit_mov_reg(&prog,
>>> +					     BPF_CLASS(insn->code) == BPF_ALU64,
>>> +					     dst_reg, src_reg);
>>> +			else
>>> +				emit_movsx_reg(&prog, insn->off,
>>> +					       BPF_CLASS(insn->code) == BPF_ALU64,
>>> +					       dst_reg, src_reg);
>>>   			break;
>>>   
>>>   			/* neg dst */
>>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>>> index 8a1cc658789e..fe648a158c9e 100644
>>> --- a/kernel/bpf/core.c
>>> +++ b/kernel/bpf/core.c
>>> @@ -61,6 +61,7 @@
>>>   #define AX	regs[BPF_REG_AX]
>>>   #define ARG1	regs[BPF_REG_ARG1]
>>>   #define CTX	regs[BPF_REG_CTX]
>>> +#define OFF	insn->off
>>>   #define IMM	insn->imm
>>>   
[...]
>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>> index fbe4ca72d4c1..5fee9f24cb5e 100644
>>> --- a/kernel/bpf/verifier.c
>>> +++ b/kernel/bpf/verifier.c
>>> @@ -3421,7 +3421,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
>>>   			return 0;
>>>   		if (opcode == BPF_MOV) {
>>>   			if (BPF_SRC(insn->code) == BPF_X) {
>>> -				/* dreg = sreg
>>> +				/* dreg = sreg or dreg = (s8, s16, s32)sreg
>>>   				 * dreg needs precision after this insn
>>>   				 * sreg needs precision before this insn
>>>   				 */
>>> @@ -5866,6 +5866,64 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
>>>   	set_sext64_default_val(reg, size);
>>>   }
>>>   
>>> +static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
>>> +{
>>> +	if (size == 1) {
>>> +		reg->s32_min_value = S8_MIN;
>>> +		reg->s32_max_value = S8_MAX;
>>> +	} else {
>>> +		/* size == 2 */
>>> +		reg->s32_min_value = S16_MIN;
>>> +		reg->s32_max_value = S16_MAX;
>>> +	}
>>> +	reg->u32_min_value = 0;
>>> +	reg->u32_max_value = U32_MAX;
>>> +}
>>> +
>>> +static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
>>> +{
>>> +	s32 init_s32_max, init_s32_min, s32_max, s32_min;
>>> +	u32 top_smax_value, top_smin_value;
>>> +	u32 num_bits = size * 8;
>>> +
>>> +	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
>>> +	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
>>> +
>>> +	if (top_smax_value != top_smin_value)
>>> +		goto out;
>>> +
>>> +	/* find the s32_min and s32_min after sign extension */
>>> +	if (size == 1) {
>>> +		init_s32_max = (s8)reg->s32_max_value;
>>> +		init_s32_min = (s8)reg->s32_min_value;
>>> +	} else {
>>> +		/* size == 2 */
>>> +		init_s32_max = (s16)reg->s32_max_value;
>>> +		init_s32_min = (s16)reg->s32_min_value;
>>> +	}
>>> +	s32_max = max(init_s32_max, init_s32_min);
>>> +	s32_min = min(init_s32_max, init_s32_min);
>>> +
>>> +	if (s32_min >= 0 && s32_max >= 0) {
>>> +		reg->s32_min_value = s32_min;
>>> +		reg->s32_max_value = s32_max;
>>> +		reg->u32_min_value = 0;
>>> +		reg->u32_max_value = U32_MAX;
>>> +		return;
>>> +	}
>>> +
>>> +	if (s32_min < 0 && s32_max < 0) {
>>> +		reg->s32_min_value = s32_min;
>>> +		reg->s32_max_value = s32_max;
>>> +		reg->u32_min_value = (u32)s32_max;
>>> +		reg->u32_max_value = (u32)s32_min;
>>> +		return;
>>> +	}
>>> +
>>> +out:
>>> +	set_sext32_default_val(reg, size);
>>> +}
>>> +
>>>   static bool bpf_map_is_rdonly(const struct bpf_map *map)
>>>   {
>>>   	/* A map is considered read-only if the following condition are true:
>>> @@ -13003,11 +13061,23 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>>>   	} else if (opcode == BPF_MOV) {
>>>   
>>>   		if (BPF_SRC(insn->code) == BPF_X) {
>>> -			if (insn->imm != 0 || insn->off != 0) {
>>> +			if (insn->imm != 0) {
>>>   				verbose(env, "BPF_MOV uses reserved fields\n");
>>>   				return -EINVAL;
>>>   			}
>>>   
>>> +			if (BPF_CLASS(insn->code) == BPF_ALU) {
>>> +				if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
>>> +					verbose(env, "BPF_MOV uses reserved fields\n");
>>> +					return -EINVAL;
>>> +				}
>>> +			} else {
>>> +				if (insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) {
>>> +					verbose(env, "BPF_MOV uses reserved fields\n");
>>> +					return -EINVAL;
>>> +				}
>>> +			}
>>> +
>>>   			/* check src operand */
>>>   			err = check_reg_arg(env, insn->src_reg, SRC_OP);
>>>   			if (err)
>>> @@ -13031,18 +13101,32 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>>>   				       !tnum_is_const(src_reg->var_off);
>>>   
>>>   			if (BPF_CLASS(insn->code) == BPF_ALU64) {
>>> -				/* case: R1 = R2
>>> -				 * copy register state to dest reg
>>> -				 */
>>> -				if (need_id)
>>> -					/* Assign src and dst registers the same ID
>>> -					 * that will be used by find_equal_scalars()
>>> -					 * to propagate min/max range.
>>> +				if (insn->off == 0) {
>>> +					/* case: R1 = R2
>>> +					 * copy register state to dest reg
>>>   					 */
>>> -					src_reg->id = ++env->id_gen;
>>> -				copy_register_state(dst_reg, src_reg);
>>> -				dst_reg->live |= REG_LIVE_WRITTEN;
>>> -				dst_reg->subreg_def = DEF_NOT_SUBREG;
>>> +					if (need_id)
>>> +						/* Assign src and dst registers the same ID
>>> +						 * that will be used by find_equal_scalars()
>>> +						 * to propagate min/max range.
>>> +						 */
>>> +						src_reg->id = ++env->id_gen;
>>> +					copy_register_state(dst_reg, src_reg);
>>> +					dst_reg->live |= REG_LIVE_WRITTEN;
>>> +					dst_reg->subreg_def = DEF_NOT_SUBREG;
>>> +				} else {
>>> +					/* case: R1 = (s8, s16 s32)R2 */
>>> +					bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
>>> +
>>> +					if (no_sext && need_id)
>>> +						src_reg->id = ++env->id_gen;
>>> +					copy_register_state(dst_reg, src_reg);
>>> +					if (!no_sext)
>>> +						dst_reg->id = 0;
>>> +					coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
>>> +					dst_reg->live |= REG_LIVE_WRITTEN;
>>> +					dst_reg->subreg_def = DEF_NOT_SUBREG;
>>> +				}
>>>   			} else {
>>>   				/* R1 = (u32) R2 */
>>>   				if (is_pointer_value(env, insn->src_reg)) {
>>> @@ -13051,19 +13135,33 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>>>   						insn->src_reg);
>>>   					return -EACCES;
>>>   				} else if (src_reg->type == SCALAR_VALUE) {
>>> -					bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
>>> -
>>> -					if (is_src_reg_u32 && need_id)
>>> -						src_reg->id = ++env->id_gen;
>>> -					copy_register_state(dst_reg, src_reg);
>>> -					/* Make sure ID is cleared if src_reg is not in u32 range otherwise
>>> -					 * dst_reg min/max could be incorrectly
>>> -					 * propagated into src_reg by find_equal_scalars()
>>> -					 */
>>> -					if (!is_src_reg_u32)
>>> -						dst_reg->id = 0;
>>> -					dst_reg->live |= REG_LIVE_WRITTEN;
>>> -					dst_reg->subreg_def = env->insn_idx + 1;
>>> +					if (insn->off == 0) {
>>> +						bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
>>> +
>>> +						if (is_src_reg_u32 && need_id)
>>> +							src_reg->id = ++env->id_gen;
>>> +						copy_register_state(dst_reg, src_reg);
>>> +						/* Make sure ID is cleared if src_reg is not in u32 range otherwise
>>> +						 * dst_reg min/max could be incorrectly
>>> +						 * propagated into src_reg by find_equal_scalars()
>>> +						 */
>>> +						if (!is_src_reg_u32)
>>> +							dst_reg->id = 0;
>>> +						dst_reg->live |= REG_LIVE_WRITTEN;
>>> +						dst_reg->subreg_def = env->insn_idx + 1;
>>> +					} else {
>>> +						/* case: W1 = (s8, s16)W2 */
>>> +						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
>>> +
>>> +						if (no_sext && need_id)
>>> +							src_reg->id = ++env->id_gen;
>>> +						copy_register_state(dst_reg, src_reg);
>>> +						if (!no_sext)
>>> +							dst_reg->id = 0;
>>> +						dst_reg->live |= REG_LIVE_WRITTEN;
>>> +						dst_reg->subreg_def = env->insn_idx + 1;
>>> +						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
> 
> I tried the following test program:
> 
> {
>   "testtesttest",
>   .insns = {
>   BPF_MOV64_IMM(BPF_REG_7, 0xffff),
>   {
>   .code = BPF_ALU | BPF_MOV | BPF_X,
>   .dst_reg = BPF_REG_0,
>   .src_reg = BPF_REG_7,
>   .off = 16,
>   .imm = 0,
>   },
>   BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 1),
>   BPF_EXIT_INSN(),
>   },
>   .result = ACCEPT,
>   .retval = 0,
> },
> 
> And it produces verification log as below:
> 
>   0: R1=ctx(off=0,imm=0) R10=fp0
>   0: (b7) r7 = 65535    ; R7_w=P65535
>   1: (bc) w0 = w7       ; R0_w=P65535 R7_w=P65535
>   2: (77) r0 >>= 1      ; R0_w=P32767
>   3: (95) exit
>   ...
>   FAIL retval 2147483647 != 0 (run 1/1)
> 
> Note that verifier considers R0 to be 0x7FFF at 3,
> while actual value during execution is 0x7FFF'FFFF.

This is a verifier issue. Will fix.

> 
>>> +					}
>>>   				} else {
>>>   					mark_reg_unknown(env, regs,
>>>   							 insn->dst_reg);
>
Eduard Zingerman July 19, 2023, 12:53 p.m. UTC | #3
On Tue, 2023-07-18 at 18:17 -0700, Yonghong Song wrote:
[...]
> > > > +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
> > > > +			   u32 src_reg)
> > > > +{
> > > > +	u8 *prog = *pprog;
> > > > +
> > > > +	if (is64) {
> > > > +		/* movs[b,w,l]q dst, src */
> > > > +		if (num_bits == 8)
> > > > +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
> > > > +			      add_2reg(0xC0, src_reg, dst_reg));
> > > > +		else if (num_bits == 16)
> > > > +			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
> > > > +			      add_2reg(0xC0, src_reg, dst_reg));
> > > > +		else if (num_bits == 32)
> > > > +			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
> > > > +			      add_2reg(0xC0, src_reg, dst_reg));
> > > > +	} else {
> > > > +		/* movs[b,w]l dst, src */
> > > > +		if (num_bits == 8) {
> > > > +			EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
> > > > +			      add_2reg(0xC0, src_reg, dst_reg));
> > 
> > Nit: As far as I understand 4-126 Vol. 2B of [1]
> >       the 0x40 prefix (REX prefix) is optional here
> >       (same as implemented below for num_bits == 16).
> 
> I think 0x40 prefix at least neededif register is from R8 - R15?

Yes, please see below.

> I use this website to do asm/disasm experiments and did
> try various combinations with first 8 and later 8 registers
> and it seems correct results are generated.

It seems all roads lead to that web-site, I used it as well :)
Today I learned that the following could be used:

  echo 'movsx rax,ax' | as -o /dev/null -aln -msyntax=intel -mnaked-reg
  
Which opens a road to scripting experiments.

> > 
> > [1] https://cdrdv2.intel.com/v1/dl/getContent/671200
> > 
> > 
> > > > +		} else if (num_bits == 16) {
> > > > +			if (is_ereg(dst_reg) || is_ereg(src_reg))
> > > > +				EMIT1(add_2mod(0x40, src_reg, dst_reg));
> > > > +			EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
> > 
> > Nit: Basing on the same manual I don't understand why
> >       add_2mod(0x0f, src_reg, dst_reg) is used, '0xf' should suffice
> >       (but I tried it both ways and it works...).
> 
>  From the above online assembler website.
> 
> But I will check the doc to see whether it can be simplified.

I tried all combinations of r0..r9 for 64/32-bit destinations,
32/16/8 sources [1]:
- 0x40 based prefix is generated if any of the following is true:
  - dst is 64 bit
  - dst is ereg
  - src is ereg
  - dst is 32-bit and src is 'sil' (part of 'rsi', used for r2)
    (!) This one is surprising and web-site shows the same results.
        For example `movsx eax,sil` is encoded as `40 0F BE C6`,
        disassembling `0F BE C6` (w/o prefix) gives `movsx eax,dh`.
- opcodes:
  - 63      64-bit dst, 32-bit src
  - 0F BF   64-bit dst, 16-bit src
  - 0F BE   64-bit dst,  8-bit src
  - 0F BF   32-bit dst, 16-bit src (same as 64-bit dst)
  - 0F BE   32-bit dst,  8-bit src (same as 64-bit dst)
  
Script is at [2] (it is not particularly interesting, but in case if
you want to tweak it).

[1] https://gist.github.com/eddyz87/94b35fd89f023c43dd2480e196b28ea1
[2] https://gist.github.com/eddyz87/60991379c547df11d30fa91901862227

> > > > +			      add_2reg(0xC0, src_reg, dst_reg));
> > > > +		}
> > > > +	}
> > > > +
> > > > +	*pprog = prog;
> > > > +}
[...]
Fangrui Song July 19, 2023, 3:59 p.m. UTC | #4
On Wed, Jul 19, 2023 at 5:53 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2023-07-18 at 18:17 -0700, Yonghong Song wrote:
> [...]
> > > > > +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
> > > > > +                          u32 src_reg)
> > > > > +{
> > > > > +       u8 *prog = *pprog;
> > > > > +
> > > > > +       if (is64) {
> > > > > +               /* movs[b,w,l]q dst, src */
> > > > > +               if (num_bits == 8)
> > > > > +                       EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
> > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > +               else if (num_bits == 16)
> > > > > +                       EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
> > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > +               else if (num_bits == 32)
> > > > > +                       EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
> > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > +       } else {
> > > > > +               /* movs[b,w]l dst, src */
> > > > > +               if (num_bits == 8) {
> > > > > +                       EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
> > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > >
> > > Nit: As far as I understand 4-126 Vol. 2B of [1]
> > >       the 0x40 prefix (REX prefix) is optional here
> > >       (same as implemented below for num_bits == 16).
> >
> > I think 0x40 prefix at least neededif register is from R8 - R15?
>
> Yes, please see below.
>
> > I use this website to do asm/disasm experiments and did
> > try various combinations with first 8 and later 8 registers
> > and it seems correct results are generated.
>
> It seems all roads lead to that web-site, I used it as well :)
> Today I learned that the following could be used:
>
>   echo 'movsx rax,ax' | as -o /dev/null -aln -msyntax=intel -mnaked-reg
>
> Which opens a road to scripting experiments.

This internal tool from llvm-project may also be useful:)

llvm-mc -triple=x86_64 -show-inst -x86-asm-syntax=intel
-output-asm-variant=1 <<< 'movsx rax, ax'

> > >
> > > [1] https://cdrdv2.intel.com/v1/dl/getContent/671200
> > >
> > >
> > > > > +               } else if (num_bits == 16) {
> > > > > +                       if (is_ereg(dst_reg) || is_ereg(src_reg))
> > > > > +                               EMIT1(add_2mod(0x40, src_reg, dst_reg));
> > > > > +                       EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
> > >
> > > Nit: Basing on the same manual I don't understand why
> > >       add_2mod(0x0f, src_reg, dst_reg) is used, '0xf' should suffice
> > >       (but I tried it both ways and it works...).
> >
> >  From the above online assembler website.
> >
> > But I will check the doc to see whether it can be simplified.
>
> I tried all combinations of r0..r9 for 64/32-bit destinations,
> 32/16/8 sources [1]:
> - 0x40 based prefix is generated if any of the following is true:
>   - dst is 64 bit
>   - dst is ereg
>   - src is ereg
>   - dst is 32-bit and src is 'sil' (part of 'rsi', used for r2)
>     (!) This one is surprising and web-site shows the same results.
>         For example `movsx eax,sil` is encoded as `40 0F BE C6`,
>         disassembling `0F BE C6` (w/o prefix) gives `movsx eax,dh`.
> - opcodes:
>   - 63      64-bit dst, 32-bit src
>   - 0F BF   64-bit dst, 16-bit src
>   - 0F BE   64-bit dst,  8-bit src
>   - 0F BF   32-bit dst, 16-bit src (same as 64-bit dst)
>   - 0F BE   32-bit dst,  8-bit src (same as 64-bit dst)
>
> Script is at [2] (it is not particularly interesting, but in case if
> you want to tweak it).
>
> [1] https://gist.github.com/eddyz87/94b35fd89f023c43dd2480e196b28ea1
> [2] https://gist.github.com/eddyz87/60991379c547df11d30fa91901862227
>
> > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > +               }
> > > > > +       }
> > > > > +
> > > > > +       *pprog = prog;
> > > > > +}
> [...]
Eduard Zingerman July 19, 2023, 4:57 p.m. UTC | #5
On Wed, 2023-07-19 at 08:59 -0700, Fangrui Song wrote:
> On Wed, Jul 19, 2023 at 5:53 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > On Tue, 2023-07-18 at 18:17 -0700, Yonghong Song wrote:
> > [...]
> > > > > > +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
> > > > > > +                          u32 src_reg)
> > > > > > +{
> > > > > > +       u8 *prog = *pprog;
> > > > > > +
> > > > > > +       if (is64) {
> > > > > > +               /* movs[b,w,l]q dst, src */
> > > > > > +               if (num_bits == 8)
> > > > > > +                       EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
> > > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > > +               else if (num_bits == 16)
> > > > > > +                       EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
> > > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > > +               else if (num_bits == 32)
> > > > > > +                       EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
> > > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > > +       } else {
> > > > > > +               /* movs[b,w]l dst, src */
> > > > > > +               if (num_bits == 8) {
> > > > > > +                       EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
> > > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > 
> > > > Nit: As far as I understand 4-126 Vol. 2B of [1]
> > > >       the 0x40 prefix (REX prefix) is optional here
> > > >       (same as implemented below for num_bits == 16).
> > > 
> > > I think 0x40 prefix at least neededif register is from R8 - R15?
> > 
> > Yes, please see below.
> > 
> > > I use this website to do asm/disasm experiments and did
> > > try various combinations with first 8 and later 8 registers
> > > and it seems correct results are generated.
> > 
> > It seems all roads lead to that web-site, I used it as well :)
> > Today I learned that the following could be used:
> > 
> >   echo 'movsx rax,ax' | as -o /dev/null -aln -msyntax=intel -mnaked-reg
> > 
> > Which opens a road to scripting experiments.
> 
> This internal tool from llvm-project may also be useful:)
> 
> llvm-mc -triple=x86_64 -show-inst -x86-asm-syntax=intel
> -output-asm-variant=1 <<< 'movsx rax, ax'

Thank you, this works (with -show-encoding).

> 
> > > > 
> > > > [1] https://cdrdv2.intel.com/v1/dl/getContent/671200
> > > > 
> > > > 
> > > > > > +               } else if (num_bits == 16) {
> > > > > > +                       if (is_ereg(dst_reg) || is_ereg(src_reg))
> > > > > > +                               EMIT1(add_2mod(0x40, src_reg, dst_reg));
> > > > > > +                       EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
> > > > 
> > > > Nit: Basing on the same manual I don't understand why
> > > >       add_2mod(0x0f, src_reg, dst_reg) is used, '0xf' should suffice
> > > >       (but I tried it both ways and it works...).
> > > 
> > >  From the above online assembler website.
> > > 
> > > But I will check the doc to see whether it can be simplified.
> > 
> > I tried all combinations of r0..r9 for 64/32-bit destinations,
> > 32/16/8 sources [1]:
> > - 0x40 based prefix is generated if any of the following is true:
> >   - dst is 64 bit
> >   - dst is ereg
> >   - src is ereg
> >   - dst is 32-bit and src is 'sil' (part of 'rsi', used for r2)
> >     (!) This one is surprising and web-site shows the same results.
> >         For example `movsx eax,sil` is encoded as `40 0F BE C6`,
> >         disassembling `0F BE C6` (w/o prefix) gives `movsx eax,dh`.

I think I found the place in the manual that explains situation:

  3.7.2.1 Register Operands in 64-Bit Mode
  Register operands in 64-bit mode can be any of the following:
  - ...
  - 8-bit general-purpose registers: AL, BL, CL, DL, SIL, DIL, SPL, BPL,
    and R8B-R15B are available using REX prefixes;
    AL, BL, CL, DL, AH, BH, CH, DH are available without using REX prefixes.
  - ...

Vol. 1, page 3-21
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf

> > - opcodes:
> >   - 63      64-bit dst, 32-bit src
> >   - 0F BF   64-bit dst, 16-bit src
> >   - 0F BE   64-bit dst,  8-bit src
> >   - 0F BF   32-bit dst, 16-bit src (same as 64-bit dst)
> >   - 0F BE   32-bit dst,  8-bit src (same as 64-bit dst)
> > 
> > Script is at [2] (it is not particularly interesting, but in case if
> > you want to tweak it).
> > 
> > [1] https://gist.github.com/eddyz87/94b35fd89f023c43dd2480e196b28ea1
> > [2] https://gist.github.com/eddyz87/60991379c547df11d30fa91901862227
> > 
> > > > > > +                             add_2reg(0xC0, src_reg, dst_reg));
> > > > > > +               }
> > > > > > +       }
> > > > > > +
> > > > > > +       *pprog = prog;
> > > > > > +}
> > [...]
> 
> 
>
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index addeea95f397..a740a1a6e71d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -701,6 +701,38 @@  static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 	*pprog = prog;
 }
 
+static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg,
+			   u32 src_reg)
+{
+	u8 *prog = *pprog;
+
+	if (is64) {
+		/* movs[b,w,l]q dst, src */
+		if (num_bits == 8)
+			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe,
+			      add_2reg(0xC0, src_reg, dst_reg));
+		else if (num_bits == 16)
+			EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf,
+			      add_2reg(0xC0, src_reg, dst_reg));
+		else if (num_bits == 32)
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63,
+			      add_2reg(0xC0, src_reg, dst_reg));
+	} else {
+		/* movs[b,w]l dst, src */
+		if (num_bits == 8) {
+			EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe,
+			      add_2reg(0xC0, src_reg, dst_reg));
+		} else if (num_bits == 16) {
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, src_reg, dst_reg));
+			EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf,
+			      add_2reg(0xC0, src_reg, dst_reg));
+		}
+	}
+
+	*pprog = prog;
+}
+
 /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
 static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
 {
@@ -1051,9 +1083,14 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 
 		case BPF_ALU64 | BPF_MOV | BPF_X:
 		case BPF_ALU | BPF_MOV | BPF_X:
-			emit_mov_reg(&prog,
-				     BPF_CLASS(insn->code) == BPF_ALU64,
-				     dst_reg, src_reg);
+			if (insn->off == 0)
+				emit_mov_reg(&prog,
+					     BPF_CLASS(insn->code) == BPF_ALU64,
+					     dst_reg, src_reg);
+			else
+				emit_movsx_reg(&prog, insn->off,
+					       BPF_CLASS(insn->code) == BPF_ALU64,
+					       dst_reg, src_reg);
 			break;
 
 			/* neg dst */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8a1cc658789e..fe648a158c9e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -61,6 +61,7 @@ 
 #define AX	regs[BPF_REG_AX]
 #define ARG1	regs[BPF_REG_ARG1]
 #define CTX	regs[BPF_REG_CTX]
+#define OFF	insn->off
 #define IMM	insn->imm
 
 struct bpf_mem_alloc bpf_global_ma;
@@ -1736,13 +1737,36 @@  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 		DST = -DST;
 		CONT;
 	ALU_MOV_X:
-		DST = (u32) SRC;
+		switch (OFF) {
+		case 0:
+			DST = (u32) SRC;
+			break;
+		case 8:
+			DST = (u32)(s8) SRC;
+			break;
+		case 16:
+			DST = (u32)(s16) SRC;
+			break;
+		}
 		CONT;
 	ALU_MOV_K:
 		DST = (u32) IMM;
 		CONT;
 	ALU64_MOV_X:
-		DST = SRC;
+		switch (OFF) {
+		case 0:
+			DST = SRC;
+			break;
+		case 8:
+			DST = (s8) SRC;
+			break;
+		case 16:
+			DST = (s16) SRC;
+			break;
+		case 32:
+			DST = (s32) SRC;
+			break;
+		}
 		CONT;
 	ALU64_MOV_K:
 		DST = IMM;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fbe4ca72d4c1..5fee9f24cb5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3421,7 +3421,7 @@  static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 			return 0;
 		if (opcode == BPF_MOV) {
 			if (BPF_SRC(insn->code) == BPF_X) {
-				/* dreg = sreg
+				/* dreg = sreg or dreg = (s8, s16, s32)sreg
 				 * dreg needs precision after this insn
 				 * sreg needs precision before this insn
 				 */
@@ -5866,6 +5866,64 @@  static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
 	set_sext64_default_val(reg, size);
 }
 
+static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
+{
+	if (size == 1) {
+		reg->s32_min_value = S8_MIN;
+		reg->s32_max_value = S8_MAX;
+	} else {
+		/* size == 2 */
+		reg->s32_min_value = S16_MIN;
+		reg->s32_max_value = S16_MAX;
+	}
+	reg->u32_min_value = 0;
+	reg->u32_max_value = U32_MAX;
+}
+
+static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
+{
+	s32 init_s32_max, init_s32_min, s32_max, s32_min;
+	u32 top_smax_value, top_smin_value;
+	u32 num_bits = size * 8;
+
+	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
+	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
+
+	if (top_smax_value != top_smin_value)
+		goto out;
+
+	/* find the s32_min and s32_min after sign extension */
+	if (size == 1) {
+		init_s32_max = (s8)reg->s32_max_value;
+		init_s32_min = (s8)reg->s32_min_value;
+	} else {
+		/* size == 2 */
+		init_s32_max = (s16)reg->s32_max_value;
+		init_s32_min = (s16)reg->s32_min_value;
+	}
+	s32_max = max(init_s32_max, init_s32_min);
+	s32_min = min(init_s32_max, init_s32_min);
+
+	if (s32_min >= 0 && s32_max >= 0) {
+		reg->s32_min_value = s32_min;
+		reg->s32_max_value = s32_max;
+		reg->u32_min_value = 0;
+		reg->u32_max_value = U32_MAX;
+		return;
+	}
+
+	if (s32_min < 0 && s32_max < 0) {
+		reg->s32_min_value = s32_min;
+		reg->s32_max_value = s32_max;
+		reg->u32_min_value = (u32)s32_max;
+		reg->u32_max_value = (u32)s32_min;
+		return;
+	}
+
+out:
+	set_sext32_default_val(reg, size);
+}
+
 static bool bpf_map_is_rdonly(const struct bpf_map *map)
 {
 	/* A map is considered read-only if the following condition are true:
@@ -13003,11 +13061,23 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	} else if (opcode == BPF_MOV) {
 
 		if (BPF_SRC(insn->code) == BPF_X) {
-			if (insn->imm != 0 || insn->off != 0) {
+			if (insn->imm != 0) {
 				verbose(env, "BPF_MOV uses reserved fields\n");
 				return -EINVAL;
 			}
 
+			if (BPF_CLASS(insn->code) == BPF_ALU) {
+				if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
+					verbose(env, "BPF_MOV uses reserved fields\n");
+					return -EINVAL;
+				}
+			} else {
+				if (insn->off != 0 && insn->off != 8 && insn->off != 16 && insn->off != 32) {
+					verbose(env, "BPF_MOV uses reserved fields\n");
+					return -EINVAL;
+				}
+			}
+
 			/* check src operand */
 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 			if (err)
@@ -13031,18 +13101,32 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				       !tnum_is_const(src_reg->var_off);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
-				/* case: R1 = R2
-				 * copy register state to dest reg
-				 */
-				if (need_id)
-					/* Assign src and dst registers the same ID
-					 * that will be used by find_equal_scalars()
-					 * to propagate min/max range.
+				if (insn->off == 0) {
+					/* case: R1 = R2
+					 * copy register state to dest reg
 					 */
-					src_reg->id = ++env->id_gen;
-				copy_register_state(dst_reg, src_reg);
-				dst_reg->live |= REG_LIVE_WRITTEN;
-				dst_reg->subreg_def = DEF_NOT_SUBREG;
+					if (need_id)
+						/* Assign src and dst registers the same ID
+						 * that will be used by find_equal_scalars()
+						 * to propagate min/max range.
+						 */
+						src_reg->id = ++env->id_gen;
+					copy_register_state(dst_reg, src_reg);
+					dst_reg->live |= REG_LIVE_WRITTEN;
+					dst_reg->subreg_def = DEF_NOT_SUBREG;
+				} else {
+					/* case: R1 = (s8, s16 s32)R2 */
+					bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
+
+					if (no_sext && need_id)
+						src_reg->id = ++env->id_gen;
+					copy_register_state(dst_reg, src_reg);
+					if (!no_sext)
+						dst_reg->id = 0;
+					coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
+					dst_reg->live |= REG_LIVE_WRITTEN;
+					dst_reg->subreg_def = DEF_NOT_SUBREG;
+				}
 			} else {
 				/* R1 = (u32) R2 */
 				if (is_pointer_value(env, insn->src_reg)) {
@@ -13051,19 +13135,33 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 						insn->src_reg);
 					return -EACCES;
 				} else if (src_reg->type == SCALAR_VALUE) {
-					bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
-
-					if (is_src_reg_u32 && need_id)
-						src_reg->id = ++env->id_gen;
-					copy_register_state(dst_reg, src_reg);
-					/* Make sure ID is cleared if src_reg is not in u32 range otherwise
-					 * dst_reg min/max could be incorrectly
-					 * propagated into src_reg by find_equal_scalars()
-					 */
-					if (!is_src_reg_u32)
-						dst_reg->id = 0;
-					dst_reg->live |= REG_LIVE_WRITTEN;
-					dst_reg->subreg_def = env->insn_idx + 1;
+					if (insn->off == 0) {
+						bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
+
+						if (is_src_reg_u32 && need_id)
+							src_reg->id = ++env->id_gen;
+						copy_register_state(dst_reg, src_reg);
+						/* Make sure ID is cleared if src_reg is not in u32 range otherwise
+						 * dst_reg min/max could be incorrectly
+						 * propagated into src_reg by find_equal_scalars()
+						 */
+						if (!is_src_reg_u32)
+							dst_reg->id = 0;
+						dst_reg->live |= REG_LIVE_WRITTEN;
+						dst_reg->subreg_def = env->insn_idx + 1;
+					} else {
+						/* case: W1 = (s8, s16)W2 */
+						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
+
+						if (no_sext && need_id)
+							src_reg->id = ++env->id_gen;
+						copy_register_state(dst_reg, src_reg);
+						if (!no_sext)
+							dst_reg->id = 0;
+						dst_reg->live |= REG_LIVE_WRITTEN;
+						dst_reg->subreg_def = env->insn_idx + 1;
+						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
+					}
 				} else {
 					mark_reg_unknown(env, regs,
 							 insn->dst_reg);