diff mbox series

[bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension

Message ID 20240429152036.3411628-1-xukuohai@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2321 this patch: 2321
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 6 maintainers not CCed: john.fastabend@gmail.com ndesaulniers@google.com justinstitt@google.com llvm@lists.linux.dev nathan@kernel.org morbo@google.com
netdev/build_clang success Errors and warnings before: 983 this patch: 983
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2390 this patch: 2390
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 72 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Xu Kuohai April 29, 2024, 3:20 p.m. UTC
From: Xu Kuohai <xukuohai@huawei.com>

As shown in the ExpandSEXTINREG function in [1], LLVM generates SRL and
SRA instruction pair to implement sign-extension. For x86 and arm64,
this instruction pair will be folded to a single instruction, but the
LLVM BPF backend does not do such folding.

For example, the following C code:

long f(int x)
{
	return x;
}

will be compiled to:

r0 = r1
r0 <<= 0x20
r0 s>>= 0x20
exit

Since 32-bit to 64-bit sign-extension is a common case and we already
have MOVSX instruction for sign-extension, this patch tries to fold
the 32-bit to 64-bit LSH and ARSH pair to a single MOVSX instruction.

[1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp#L1228

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 include/linux/filter.h |  8 ++++++++
 kernel/bpf/verifier.c  | 46 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

Comments

Yonghong Song April 29, 2024, 4:32 p.m. UTC | #1
On 4/29/24 8:20 AM, Xu Kuohai wrote:
> From: Xu Kuohai <xukuohai@huawei.com>
>
> As shown in the ExpandSEXTINREG function in [1], LLVM generates SRL and
> SRA instruction pair to implement sign-extension. For x86 and arm64,
> this instruction pair will be folded to a single instruction, but the
> LLVM BPF backend does not do such folding.

With -mcpu=v4, sign-extention will be generated and in selftest
test_progs-cpuv4 will test with -mcpu=v4. The cpu v4 support
is added in llvm18, and I hope once llvm18 is widely available, we
might be able to make test_progs-cpuv4 as the default test_progs.

So I think this optimization is not needed.

>
> For example, the following C code:
>
> long f(int x)
> {
> 	return x;
> }
>
> will be compiled to:
>
> r0 = r1
> r0 <<= 0x20
> r0 s>>= 0x20
> exit
>
> Since 32-bit to 64-bit sign-extension is a common case and we already
> have MOVSX instruction for sign-extension, this patch tries to fold
> the 32-bit to 64-bit LSH and ARSH pair to a single MOVSX instruction.
>
> [1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp#L1228
>
> Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
> ---
>   include/linux/filter.h |  8 ++++++++
>   kernel/bpf/verifier.c  | 46 ++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 54 insertions(+)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 7a27f19bf44d..7cc90a32ed9a 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -173,6 +173,14 @@ struct ctl_table_header;
>   		.off   = 0,					\
>   		.imm   = 0 })
>   
> +#define BPF_MOV64_SEXT_REG(DST, SRC, OFF)			\
> +	((struct bpf_insn) {					\
> +		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
> +		.dst_reg = DST,					\
> +		.src_reg = SRC,					\
> +		.off   = OFF,					\
> +		.imm   = 0 })
> +
>   #define BPF_MOV32_REG(DST, SRC)					\
>   	((struct bpf_insn) {					\
>   		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 4e474ef44e9c..6bcee052d90d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20659,6 +20659,49 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
>   	return 0;
>   }
>   
> +static bool is_sext32(struct bpf_insn *insn1, struct bpf_insn *insn2)
> +{
> +	if (insn1->code != (BPF_ALU64 | BPF_K | BPF_LSH) || insn1->imm != 32)
> +		return false;
> +
> +	if (insn2->code != (BPF_ALU64 | BPF_K | BPF_ARSH) || insn2->imm != 32)
> +		return false;
> +
> +	if (insn1->dst_reg != insn2->dst_reg)
> +		return false;
> +
> +	return true;
> +}
> +
> +/* LLVM generates sign-extension with LSH and ARSH pair, replace it with MOVSX.
> + *
> + * Before:
> + * DST <<= 32
> + * DST s>>= 32
> + *
> + * After:
> + * DST = (s32)DST
> + */
> +static int optimize_sext32_insns(struct bpf_verifier_env *env)
> +{
> +	int i, err;
> +	int insn_cnt = env->prog->len;
> +	struct bpf_insn *insn = env->prog->insnsi;
> +
> +	for (i = 0; i < insn_cnt; i++, insn++) {
> +		if (i + 1 >= insn_cnt || !is_sext32(insn, insn + 1))
> +			continue;
> +		/* patch current insn to MOVSX */
> +		*insn = BPF_MOV64_SEXT_REG(insn->dst_reg, insn->dst_reg, 32);
> +		/* remove next insn */
> +		err = verifier_remove_insns(env, i + 1, 1);
> +		if (err)
> +			return err;
> +		insn_cnt--;
> +	}
> +	return 0;
> +}
> +
>   static void free_states(struct bpf_verifier_env *env)
>   {
>   	struct bpf_verifier_state_list *sl, *sln;
> @@ -21577,6 +21620,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>   	if (ret == 0)
>   		ret = optimize_bpf_loop(env);
>   
> +	if (ret == 0)
> +		ret = optimize_sext32_insns(env);
> +
>   	if (is_priv) {
>   		if (ret == 0)
>   			opt_hard_wire_dead_code_branches(env);
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7a27f19bf44d..7cc90a32ed9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -173,6 +173,14 @@  struct ctl_table_header;
 		.off   = 0,					\
 		.imm   = 0 })
 
+#define BPF_MOV64_SEXT_REG(DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 #define BPF_MOV32_REG(DST, SRC)					\
 	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e474ef44e9c..6bcee052d90d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20659,6 +20659,49 @@  static int optimize_bpf_loop(struct bpf_verifier_env *env)
 	return 0;
 }
 
+static bool is_sext32(struct bpf_insn *insn1, struct bpf_insn *insn2)
+{
+	if (insn1->code != (BPF_ALU64 | BPF_K | BPF_LSH) || insn1->imm != 32)
+		return false;
+
+	if (insn2->code != (BPF_ALU64 | BPF_K | BPF_ARSH) || insn2->imm != 32)
+		return false;
+
+	if (insn1->dst_reg != insn2->dst_reg)
+		return false;
+
+	return true;
+}
+
+/* LLVM generates sign-extension with LSH and ARSH pair, replace it with MOVSX.
+ *
+ * Before:
+ * DST <<= 32
+ * DST s>>= 32
+ *
+ * After:
+ * DST = (s32)DST
+ */
+static int optimize_sext32_insns(struct bpf_verifier_env *env)
+{
+	int i, err;
+	int insn_cnt = env->prog->len;
+	struct bpf_insn *insn = env->prog->insnsi;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (i + 1 >= insn_cnt || !is_sext32(insn, insn + 1))
+			continue;
+		/* patch current insn to MOVSX */
+		*insn = BPF_MOV64_SEXT_REG(insn->dst_reg, insn->dst_reg, 32);
+		/* remove next insn */
+		err = verifier_remove_insns(env, i + 1, 1);
+		if (err)
+			return err;
+		insn_cnt--;
+	}
+	return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state_list *sl, *sln;
@@ -21577,6 +21620,9 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret == 0)
 		ret = optimize_bpf_loop(env);
 
+	if (ret == 0)
+		ret = optimize_sext32_insns(env);
+
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);