diff mbox series

[bpf-next,6/8] arm32, bpf: add support for 64 bit division instruction

Message ID 20230905210621.1711859-7-puranjay12@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series arm32, bpf: add support for cpuv4 insns | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 1 maintainers not CCed: linux-arm-kernel@lists.infradead.org
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch warning WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline fail Was 0 now: 1

Commit Message

Puranjay Mohan Sept. 5, 2023, 9:06 p.m. UTC
ARM32 doesn't have instructions to do 64-bit/64-bit divisions. So, to
implement the following instructions:
BPF_ALU64 | BPF_DIV
BPF_ALU64 | BPF_MOD
BPF_ALU64 | BPF_SDIV
BPF_ALU64 | BPF_SMOD

We implement the above instructions by doing function calls to div64_u64()
and div64_u64_rem() for unsigned division/mod and calls to div64_s64()
for signed division/mod.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm/net/bpf_jit_32.c | 107 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

Comments

Russell King (Oracle) Sept. 5, 2023, 9:50 p.m. UTC | #1
On Tue, Sep 05, 2023 at 09:06:19PM +0000, Puranjay Mohan wrote:
> +cont:
> +
> +	/* Call appropriate function */
> +	if (sign)
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv64 : (u32)jit_smod64, ctx);
> +	else
> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv64 : (u32)jit_mod64, ctx);

Same comment as the previous patch here.

> +
> +	emit_blx_r(ARM_IP, ctx);
> +
> +	/* Save return value */
> +	if (rd[1] != ARM_R0) {
> +		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
> +		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
> +	}
> +
> +	/* Recover {R1, R0} from stack if it is not Rd */
> +	if (rd[1] != ARM_R0)
> +		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
> +	else
> +		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
> +
> +	/* Recover {R3, R2} from stack if it is not Rd */
> +	if (rd[1] != ARM_R2)
> +		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
> +	else
> +		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);

	if (rd[1] != ARM_R0) {
		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
	} else if (rd[1] != ARM_R2) {
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
	} else {
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
	}

Hmm?
Puranjay Mohan Sept. 6, 2023, 9:29 a.m. UTC | #2
On Tue, Sep 05 2023, Russell King (Oracle) wrote:

> On Tue, Sep 05, 2023 at 09:06:19PM +0000, Puranjay Mohan wrote:
>> +cont:
>> +
>> +	/* Call appropriate function */
>> +	if (sign)
>> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv64 : (u32)jit_smod64, ctx);
>> +	else
>> +		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv64 : (u32)jit_mod64, ctx);
>
> Same comment as the previous patch here.

Will fix both in next version.

>
>> +
>> +	emit_blx_r(ARM_IP, ctx);
>> +
>> +	/* Save return value */
>> +	if (rd[1] != ARM_R0) {
>> +		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
>> +		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
>> +	}
>> +
>> +	/* Recover {R1, R0} from stack if it is not Rd */
>> +	if (rd[1] != ARM_R0)
>> +		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
>> +	else
>> +		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>> +
>> +	/* Recover {R3, R2} from stack if it is not Rd */
>> +	if (rd[1] != ARM_R2)
>> +		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
>> +	else
>> +		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>
> 	if (rd[1] != ARM_R0) {
> 		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
> 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
> 	} else if (rd[1] != ARM_R2) {
> 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
> 		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
> 	} else {
> 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
> 	}
>
> Hmm?

Actually, there can also be a situation where rd[1] != ARM_R0 && rd[1] != ARM_R2,
so should I do it like:

 	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
 		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
 		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);      
 	} else if (rd[1] != ARM_R0) {
 		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
 	} else if (rd[1] != ARM_R2) {
 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
 		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
 	} else {
 		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
 	}

Thanks,
Puranjay
Russell King (Oracle) Sept. 6, 2023, 6:56 p.m. UTC | #3
On Wed, Sep 06, 2023 at 09:29:19AM +0000, Puranjay Mohan wrote:
> On Tue, Sep 05 2023, Russell King (Oracle) wrote:
> 
> > On Tue, Sep 05, 2023 at 09:06:19PM +0000, Puranjay Mohan wrote:
> Actually, there can also be a situation where rd[1] != ARM_R0 && rd[1] != ARM_R2,
> so should I do it like:
> 
>  	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);      
>  	} else if (rd[1] != ARM_R0) {
>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>  	} else if (rd[1] != ARM_R2) {
>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
>  	} else {
>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
>  	}

Are you sure all four states are possible?
Puranjay Mohan Sept. 6, 2023, 7:19 p.m. UTC | #4
On Wed, Sep 06 2023, Russell King (Oracle) wrote:

> On Wed, Sep 06, 2023 at 09:29:19AM +0000, Puranjay Mohan wrote:
>> On Tue, Sep 05 2023, Russell King (Oracle) wrote:
>> 
>> > On Tue, Sep 05, 2023 at 09:06:19PM +0000, Puranjay Mohan wrote:
>> Actually, there can also be a situation where rd[1] != ARM_R0 && rd[1] != ARM_R2,
>> so should I do it like:
>> 
>>  	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
>>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
>>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);      
>>  	} else if (rd[1] != ARM_R0) {
>>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
>>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>>  	} else if (rd[1] != ARM_R2) {
>>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
>>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
>>  	} else {
>>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
>>  	}
>
> Are you sure all four states are possible?

ohh!

I just realized that the last else will never run.
rd[1] can never be equal to both ARM_R0 and ARM_R2.
Will fix it in V3 as I already sent out the V2.

I need to learn to leave patches on the list for few days before re-spinning.

Thanks,
Puranjay
Russell King (Oracle) Sept. 6, 2023, 8:26 p.m. UTC | #5
On Wed, Sep 06, 2023 at 07:19:50PM +0000, Puranjay Mohan wrote:
> On Wed, Sep 06 2023, Russell King (Oracle) wrote:
> 
> > On Wed, Sep 06, 2023 at 09:29:19AM +0000, Puranjay Mohan wrote:
> >> On Tue, Sep 05 2023, Russell King (Oracle) wrote:
> >> 
> >> > On Tue, Sep 05, 2023 at 09:06:19PM +0000, Puranjay Mohan wrote:
> >> Actually, there can also be a situation where rd[1] != ARM_R0 && rd[1] != ARM_R2,
> >> so should I do it like:
> >> 
> >>  	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
> >>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
> >>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);      
> >>  	} else if (rd[1] != ARM_R0) {
> >>  		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
> >>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
> >>  	} else if (rd[1] != ARM_R2) {
> >>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
> >>  		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
> >>  	} else {
> >>  		emit(ARM_ADD_I(ARM_SP, ARM_SP, 16), ctx);
> >>  	}
> >
> > Are you sure all four states are possible?
> 
> ohh!
> 
> I just realized that the last else will never run.
> rd[1] can never be equal to both ARM_R0 and ARM_R2.
> Will fix it in V3 as I already sent out the V2.
> 
> I need to learn to leave patches on the list for few days before re-spinning.

The last comment on that is you can pop r0-r3 in one go, rather than
using two instructions.
diff mbox series

Patch

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index f580ecf75710..761056db964f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2,6 +2,7 @@ 
 /*
  * Just-In-Time compiler for eBPF filters on 32bit ARM
  *
+ * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
  * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
  */
@@ -15,6 +16,7 @@ 
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
+#include <linux/math64.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hwcap.h>
@@ -238,6 +240,34 @@  static s32 jit_smod32(s32 dividend, s32 divisor)
 	return dividend % divisor;
 }
 
+/* Wrappers for 64-bit div/mod */
+static u64 jit_udiv64(u64 dividend, u64 divisor)
+{
+	return div64_u64(dividend, divisor);
+}
+
+static u64 jit_mod64(u64 dividend, u64 divisor)
+{
+	u64 rem;
+
+	div64_u64_rem(dividend, divisor, &rem);
+	return rem;
+}
+
+static s64 jit_sdiv64(s64 dividend, s64 divisor)
+{
+	return div64_s64(dividend, divisor);
+}
+
+static s64 jit_smod64(s64 dividend, s64 divisor)
+{
+	u64 q;
+
+	q = div64_s64(dividend, divisor);
+
+	return dividend - q * divisor;
+}
+
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
 	inst |= (cond << 28);
@@ -547,6 +577,69 @@  static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op,
 		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
 }
 
+static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
+				  u8 op, u8 sign)
+{
+	/* Push caller-saved registers on stack */
+	emit(ARM_PUSH(CALLER_MASK), ctx);
+
+	/*
+	 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
+	 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
+	 * we can recover them later after returning from the function call.
+	 */
+	if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
+		/*
+		 * Move Rm to {R1, R0} if it is not already there.
+		 */
+		if (rm[1] != ARM_R0) {
+			if (rn[1] == ARM_R0)
+				emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+			emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
+			emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
+			if (rn[1] == ARM_R0) {
+				emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+				goto cont;
+			}
+		}
+		/*
+		 * Move Rn to {R3, R2} if it is not already there.
+		 */
+		if (rn[1] != ARM_R2) {
+			emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
+			emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
+		}
+	}
+
+cont:
+
+	/* Call appropriate function */
+	if (sign)
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_sdiv64 : (u32)jit_smod64, ctx);
+	else
+		emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv64 : (u32)jit_mod64, ctx);
+
+	emit_blx_r(ARM_IP, ctx);
+
+	/* Save return value */
+	if (rd[1] != ARM_R0) {
+		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
+		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
+	}
+
+	/* Recover {R1, R0} from stack if it is not Rd */
+	if (rd[1] != ARM_R0)
+		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+	else
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+
+	/* Recover {R3, R2} from stack if it is not Rd */
+	if (rd[1] != ARM_R2)
+		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+	else
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+}
+
 /* Is the translated BPF register on stack? */
 static bool is_stacked(s8 reg)
 {
@@ -1569,7 +1662,19 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_ALU64 | BPF_DIV | BPF_X:
 	case BPF_ALU64 | BPF_MOD | BPF_K:
 	case BPF_ALU64 | BPF_MOD | BPF_X:
-		goto notyet;
+		rd = arm_bpf_get_reg64(dst, tmp2, ctx);
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			rs = arm_bpf_get_reg64(src, tmp, ctx);
+			break;
+		case BPF_K:
+			rs = tmp;
+			emit_a32_mov_se_i64(is64, rs, imm, ctx);
+			break;
+		}
+		emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
+		arm_bpf_put_reg64(dst, rd, ctx);
+		break;
 	/* dst = dst << imm */
 	/* dst = dst >> imm */
 	/* dst = dst >> imm (signed) */