diff mbox series

[bpf-next,5/8] arm32, bpf: add support for 32-bit signed division

Message ID 20230905210621.1711859-6-puranjay12@gmail.com (mailing list archive)
State New
Headers show
Series arm32, bpf: add support for cpuv4 insns | expand

Commit Message

Puranjay Mohan Sept. 5, 2023, 9:06 p.m. UTC
The cpuv4 added a new BPF_SDIV instruction that does signed division.
The encoding is similar to BPF_DIV but BPF_SDIV sets offset=1.

ARM32 already supports 32-bit BPF_DIV which can be easily extended to
support BPF_SDIV as ARM32 has the SDIV instruction. When the CPU is not
ARM-v7, we implement that SDIV/SMOD with the function call similar to
the implementation of DIV/MOD.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm/net/bpf_jit_32.c | 26 ++++++++++++++++++++------
 arch/arm/net/bpf_jit_32.h |  2 ++
 2 files changed, 22 insertions(+), 6 deletions(-)

Comments

Russell King (Oracle) Sept. 5, 2023, 9:44 p.m. UTC | #1
On Tue, Sep 05, 2023 at 09:06:18PM +0000, Puranjay Mohan wrote:
> The cpuv4 added a new BPF_SDIV instruction that does signed division.
> The encoding is similar to BPF_DIV but BPF_SDIV sets offset=1.
> 
> ARM32 already supports 32-bit BPF_DIV which can be easily extended to
> support BPF_SDIV as ARM32 has the SDIV instruction. When the CPU is not
> ARM-v7, we implement that SDIV/SMOD with the function call similar to
> the implementation of DIV/MOD.
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
>  arch/arm/net/bpf_jit_32.c | 26 ++++++++++++++++++++------
>  arch/arm/net/bpf_jit_32.h |  2 ++
>  2 files changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
> index 09496203f13e..f580ecf75710 100644
> --- a/arch/arm/net/bpf_jit_32.c
> +++ b/arch/arm/net/bpf_jit_32.c
> @@ -228,6 +228,16 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
>  	return dividend % divisor;
>  }
>  
> +static s32 jit_sdiv32(s32 dividend, s32 divisor)
> +{
> +	return dividend / divisor;
> +}
> +
> +static s32 jit_smod32(s32 dividend, s32 divisor)
> +{
> +	return dividend % divisor;
> +}
> +
>  static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
>  {
>  	inst |= (cond << 28);
> @@ -477,7 +487,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
>  	return to - from - 2;
>  }
>  
> -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
> +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
>  {
>  	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
>  	const s8 *tmp = bpf2a32[TMP_REG_1];
> @@ -485,9 +495,10 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
>  #if __LINUX_ARM_ARCH__ == 7
>  	if (elf_hwcap & HWCAP_IDIVA) {
>  		if (op == BPF_DIV)
> -			emit(ARM_UDIV(rd, rm, rn), ctx);
> +			sign ? emit(ARM_SDIV(rd, rm, rn), ctx) : emit(ARM_UDIV(rd, rm, rn), ctx);

Oh no, let's not go using the ternary operator like that. If we want
to use the ternary operator, then:

			emit(sign ? ARM_SDIV(rd, rm, rn) :
				    ARM_UDIV(rd, rm, rn), ctx);

would be _much_ better, since what is actually conditional is the value
passed to emit().

If we want to avoid the ternary operator altogether, then obviously
if() emit() else emit(), but I'd prefer my suggestion above.
>  	/* Call appropriate function */
> -	emit_mov_i(ARM_IP, op == BPF_DIV ?
> -		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
> +	if (sign)
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv32 : (u32)jit_smod32, ctx);
> +	else
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx);

	u32 dst;

	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv32;
		else
			dst = (u32)jit_smod32;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv32;
		else
			dst = (u32)hit_mod32;
	}

	emit_mov_i(ARM_IP, dst, dtx);
>  	emit_blx_r(ARM_IP, ctx);
>  
>  	/* Restore caller-saved registers from stack */
diff mbox series

Patch

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 09496203f13e..f580ecf75710 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -228,6 +228,16 @@  static u32 jit_mod32(u32 dividend, u32 divisor)
 	return dividend % divisor;
 }
 
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+	return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+	return dividend % divisor;
+}
+
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
 	inst |= (cond << 28);
@@ -477,7 +487,7 @@  static inline int epilogue_offset(const struct jit_ctx *ctx)
 	return to - from - 2;
 }
 
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
 {
 	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
 	const s8 *tmp = bpf2a32[TMP_REG_1];
@@ -485,9 +495,10 @@  static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 #if __LINUX_ARM_ARCH__ == 7
 	if (elf_hwcap & HWCAP_IDIVA) {
 		if (op == BPF_DIV)
-			emit(ARM_UDIV(rd, rm, rn), ctx);
+			sign ? emit(ARM_SDIV(rd, rm, rn), ctx) : emit(ARM_UDIV(rd, rm, rn), ctx);
 		else {
-			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+			sign ? emit(ARM_SDIV(ARM_IP, rm, rn), ctx) :
+				emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
 			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
 		}
 		return;
@@ -515,8 +526,11 @@  static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 	emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
 
 	/* Call appropriate function */
-	emit_mov_i(ARM_IP, op == BPF_DIV ?
-		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+	if (sign)
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv32 : (u32)jit_smod32, ctx);
+	else
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+
 	emit_blx_r(ARM_IP, ctx);
 
 	/* Restore caller-saved registers from stack */
@@ -1546,7 +1560,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			rt = src_lo;
 			break;
 		}
-		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
 		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
 		if (!ctx->prog->aux->verifier_zext)
 			emit_a32_mov_i(dst_hi, 0, ctx);
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index 79c7373fadce..438f0e1f91a0 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -139,6 +139,7 @@ 
 #define ARM_INST_TST_I		0x03100000
 
 #define ARM_INST_UDIV		0x0730f010
+#define ARM_INST_SDIV		0x0710f010
 
 #define ARM_INST_UMULL		0x00800090
 
@@ -267,6 +268,7 @@ 
 #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
 
 #define ARM_UDIV(rd, rn, rm)	(ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
+#define ARM_SDIV(rd, rn, rm)	(ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)
 
 #define ARM_UMULL(rd_lo, rd_hi, rn, rm)	(ARM_INST_UMULL | (rd_hi) << 16 \
 					 | (rd_lo) << 12 | (rm) << 8 | rn)