diff mbox series

[bpf-next,5/8] arm32, bpf: add support for 32-bit signed division

Message ID 20230905210621.1711859-6-puranjay12@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series arm32, bpf: add support for cpuv4 insns | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 1 maintainers not CCed: linux-arm-kernel@lists.infradead.org
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch warning WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline warning Was 1 now: 1

Commit Message

Puranjay Mohan Sept. 5, 2023, 9:06 p.m. UTC
The cpuv4 added a new BPF_SDIV instruction that does signed division.
The encoding is similar to BPF_DIV but BPF_SDIV sets offset=1.

ARM32 already supports 32-bit BPF_DIV which can be easily extended to
support BPF_SDIV as ARM32 has the SDIV instruction. When the CPU is not
ARM-v7, we implement that SDIV/SMOD with the function call similar to
the implementation of DIV/MOD.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm/net/bpf_jit_32.c | 26 ++++++++++++++++++++------
 arch/arm/net/bpf_jit_32.h |  2 ++
 2 files changed, 22 insertions(+), 6 deletions(-)

Comments

Russell King (Oracle) Sept. 5, 2023, 9:44 p.m. UTC | #1
On Tue, Sep 05, 2023 at 09:06:18PM +0000, Puranjay Mohan wrote:
> The cpuv4 added a new BPF_SDIV instruction that does signed division.
> The encoding is similar to BPF_DIV but BPF_SDIV sets offset=1.
> 
> ARM32 already supports 32-bit BPF_DIV which can be easily extended to
> support BPF_SDIV as ARM32 has the SDIV instruction. When the CPU is not
> ARM-v7, we implement that SDIV/SMOD with the function call similar to
> the implementation of DIV/MOD.
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
>  arch/arm/net/bpf_jit_32.c | 26 ++++++++++++++++++++------
>  arch/arm/net/bpf_jit_32.h |  2 ++
>  2 files changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
> index 09496203f13e..f580ecf75710 100644
> --- a/arch/arm/net/bpf_jit_32.c
> +++ b/arch/arm/net/bpf_jit_32.c
> @@ -228,6 +228,16 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
>  	return dividend % divisor;
>  }
>  
> +static s32 jit_sdiv32(s32 dividend, s32 divisor)
> +{
> +	return dividend / divisor;
> +}
> +
> +static s32 jit_smod32(s32 dividend, s32 divisor)
> +{
> +	return dividend % divisor;
> +}
> +
>  static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
>  {
>  	inst |= (cond << 28);
> @@ -477,7 +487,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
>  	return to - from - 2;
>  }
>  
> -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
> +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
>  {
>  	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
>  	const s8 *tmp = bpf2a32[TMP_REG_1];
> @@ -485,9 +495,10 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
>  #if __LINUX_ARM_ARCH__ == 7
>  	if (elf_hwcap & HWCAP_IDIVA) {
>  		if (op == BPF_DIV)
> -			emit(ARM_UDIV(rd, rm, rn), ctx);
> +			sign ? emit(ARM_SDIV(rd, rm, rn), ctx) : emit(ARM_UDIV(rd, rm, rn), ctx);

Oh no, let's not go using the ternary operator like that. If we want
to use the ternary operator, then:

			emit(sign ? ARM_SDIV(rd, rm, rn) :
				    ARM_UDIV(rd, rm, rn), ctx);

would be _much_ better, since what is actually conditional is the value
passed to emit().

If we want to avoid the ternary operator altogether, then obviously
if() emit() else emit(), but I'd prefer my suggestion above.
>  	/* Call appropriate function */
> -	emit_mov_i(ARM_IP, op == BPF_DIV ?
> -		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
> +	if (sign)
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv32 : (u32)jit_smod32, ctx);
> +	else
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx);

	u32 dst;

	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv32;
		else
			dst = (u32)jit_smod32;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv32;
		else
			dst = (u32)hit_mod32;
	}

	emit_mov_i(ARM_IP, dst, dtx);
>  	emit_blx_r(ARM_IP, ctx);
>  
>  	/* Restore caller-saved registers from stack */
diff mbox series

Patch

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 09496203f13e..f580ecf75710 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -228,6 +228,16 @@  static u32 jit_mod32(u32 dividend, u32 divisor)
 	return dividend % divisor;
 }
 
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+	return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+	return dividend % divisor;
+}
+
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
 	inst |= (cond << 28);
@@ -477,7 +487,7 @@  static inline int epilogue_offset(const struct jit_ctx *ctx)
 	return to - from - 2;
 }
 
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
 {
 	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
 	const s8 *tmp = bpf2a32[TMP_REG_1];
@@ -485,9 +495,10 @@  static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 #if __LINUX_ARM_ARCH__ == 7
 	if (elf_hwcap & HWCAP_IDIVA) {
 		if (op == BPF_DIV)
-			emit(ARM_UDIV(rd, rm, rn), ctx);
+			sign ? emit(ARM_SDIV(rd, rm, rn), ctx) : emit(ARM_UDIV(rd, rm, rn), ctx);
 		else {
-			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+			sign ? emit(ARM_SDIV(ARM_IP, rm, rn), ctx) :
+				emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
 			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
 		}
 		return;
@@ -515,8 +526,11 @@  static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 	emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
 
 	/* Call appropriate function */
-	emit_mov_i(ARM_IP, op == BPF_DIV ?
-		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+	if (sign)
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv32 : (u32)jit_smod32, ctx);
+	else
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+
 	emit_blx_r(ARM_IP, ctx);
 
 	/* Restore caller-saved registers from stack */
@@ -1546,7 +1560,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			rt = src_lo;
 			break;
 		}
-		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
 		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
 		if (!ctx->prog->aux->verifier_zext)
 			emit_a32_mov_i(dst_hi, 0, ctx);
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index 79c7373fadce..438f0e1f91a0 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -139,6 +139,7 @@ 
 #define ARM_INST_TST_I		0x03100000
 
 #define ARM_INST_UDIV		0x0730f010
+#define ARM_INST_SDIV		0x0710f010
 
 #define ARM_INST_UMULL		0x00800090
 
@@ -267,6 +268,7 @@ 
 #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
 
 #define ARM_UDIV(rd, rn, rm)	(ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
+#define ARM_SDIV(rd, rn, rm)	(ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)
 
 #define ARM_UMULL(rd_lo, rd_hi, rn, rm)	(ARM_INST_UMULL | (rd_hi) << 16 \
 					 | (rd_lo) << 12 | (rm) << 8 | rn)