diff mbox series

[v1,06/10] powerpc/bpf: Perform complete extra passes to update addresses

Message ID c13ebeb4d5d169bda6d1d60ccaa6cc956308308d.1669881248.git.christophe.leroy@csgroup.eu (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series [v1,01/10] powerpc/bpf/32: Fix Oops on tail call tests | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-16 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 fail Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-21 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16

Commit Message

Christophe Leroy Dec. 1, 2022, 7:56 a.m. UTC
BPF core calls the jit compiler again for an extra pass in order
to properly set subprog addresses.

Unlike other architectures, powerpc only updates the addresses
during that extra pass. It means that holes must have been left
in the code in order to enable the maximum possible instruction
size.

In order avoid waste of space, and waste of CPU time on powerpc
processors on which the NOP instruction is not 0-cycle, perform
two real additional passes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/net/bpf_jit_comp.c | 85 ---------------------------------
 1 file changed, 85 deletions(-)

Comments

Naveen N. Rao Dec. 13, 2022, 10:23 a.m. UTC | #1
Christophe Leroy wrote:
> BPF core calls the jit compiler again for an extra pass in order
> to properly set subprog addresses.
> 
> Unlike other architectures, powerpc only updates the addresses
> during that extra pass. It means that holes must have been left
> in the code in order to enable the maximum possible instruction
> size.
> 
> In order avoid waste of space, and waste of CPU time on powerpc
> processors on which the NOP instruction is not 0-cycle, perform
> two real additional passes.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/net/bpf_jit_comp.c | 85 ---------------------------------
>  1 file changed, 85 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 43e634126514..8833bf23f5aa 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
>  	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>  }
>  
> -/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
> -static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
> -				   struct codegen_context *ctx, u32 *addrs)
> -{
> -	const struct bpf_insn *insn = fp->insnsi;
> -	bool func_addr_fixed;
> -	u64 func_addr;
> -	u32 tmp_idx;
> -	int i, j, ret;
> -
> -	for (i = 0; i < fp->len; i++) {
> -		/*
> -		 * During the extra pass, only the branch target addresses for
> -		 * the subprog calls need to be fixed. All other instructions
> -		 * can left untouched.
> -		 *
> -		 * The JITed image length does not change because we already
> -		 * ensure that the JITed instruction sequence for these calls
> -		 * are of fixed length by padding them with NOPs.
> -		 */
> -		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
> -		    insn[i].src_reg == BPF_PSEUDO_CALL) {
> -			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
> -						    &func_addr,
> -						    &func_addr_fixed);

I don't see you updating calls to bpf_jit_get_func_addr() in 
bpf_jit_build_body() to set extra_pass to true. Afaics, that's required 
to get the correct address to be branched to for subprogs.


- Naveen
Christophe Leroy Dec. 19, 2022, 7:06 p.m. UTC | #2
Le 13/12/2022 à 11:23, Naveen N. Rao a écrit :
> Christophe Leroy wrote:
>> BPF core calls the jit compiler again for an extra pass in order
>> to properly set subprog addresses.
>>
>> Unlike other architectures, powerpc only updates the addresses
>> during that extra pass. It means that holes must have been left
>> in the code in order to enable the maximum possible instruction
>> size.
>>
>> In order avoid waste of space, and waste of CPU time on powerpc
>> processors on which the NOP instruction is not 0-cycle, perform
>> two real additional passes.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>  arch/powerpc/net/bpf_jit_comp.c | 85 ---------------------------------
>>  1 file changed, 85 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>> b/arch/powerpc/net/bpf_jit_comp.c
>> index 43e634126514..8833bf23f5aa 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, 
>> unsigned int size)
>>      memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>>  }
>>
>> -/* Fix updated addresses (for subprog calls, ldimm64, et al) during 
>> extra pass */
>> -static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
>> -                   struct codegen_context *ctx, u32 *addrs)
>> -{
>> -    const struct bpf_insn *insn = fp->insnsi;
>> -    bool func_addr_fixed;
>> -    u64 func_addr;
>> -    u32 tmp_idx;
>> -    int i, j, ret;
>> -
>> -    for (i = 0; i < fp->len; i++) {
>> -        /*
>> -         * During the extra pass, only the branch target addresses for
>> -         * the subprog calls need to be fixed. All other instructions
>> -         * can left untouched.
>> -         *
>> -         * The JITed image length does not change because we already
>> -         * ensure that the JITed instruction sequence for these calls
>> -         * are of fixed length by padding them with NOPs.
>> -         */
>> -        if (insn[i].code == (BPF_JMP | BPF_CALL) &&
>> -            insn[i].src_reg == BPF_PSEUDO_CALL) {
>> -            ret = bpf_jit_get_func_addr(fp, &insn[i], true,
>> -                            &func_addr,
>> -                            &func_addr_fixed);
> 
> I don't see you updating calls to bpf_jit_get_func_addr() in 
> bpf_jit_build_body() to set extra_pass to true. Afaics, that's required 
> to get the correct address to be branched to for subprogs.
> 

I don't understand what you mean.

My understanding is that bpf_int_jit_compile() is called twice by 
jit_subprogs(), second call sets 'extra_pass" due to jit_data->addrs = 
addrs being set at the end of first pass.

Christophe
Naveen N. Rao Jan. 10, 2023, 8:44 a.m. UTC | #3
Christophe Leroy wrote:
> 
> 
> Le 13/12/2022 à 11:23, Naveen N. Rao a écrit :
>> Christophe Leroy wrote:
>>> BPF core calls the jit compiler again for an extra pass in order
>>> to properly set subprog addresses.
>>>
>>> Unlike other architectures, powerpc only updates the addresses
>>> during that extra pass. It means that holes must have been left
>>> in the code in order to enable the maximum possible instruction
>>> size.
>>>
>>> In order avoid waste of space, and waste of CPU time on powerpc
>>> processors on which the NOP instruction is not 0-cycle, perform
>>> two real additional passes.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>>  arch/powerpc/net/bpf_jit_comp.c | 85 ---------------------------------
>>>  1 file changed, 85 deletions(-)
>>>
>>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>>> b/arch/powerpc/net/bpf_jit_comp.c
>>> index 43e634126514..8833bf23f5aa 100644
>>> --- a/arch/powerpc/net/bpf_jit_comp.c
>>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>>> @@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, 
>>> unsigned int size)
>>>      memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>>>  }
>>>
>>> -/* Fix updated addresses (for subprog calls, ldimm64, et al) during 
>>> extra pass */
>>> -static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
>>> -                   struct codegen_context *ctx, u32 *addrs)
>>> -{
>>> -    const struct bpf_insn *insn = fp->insnsi;
>>> -    bool func_addr_fixed;
>>> -    u64 func_addr;
>>> -    u32 tmp_idx;
>>> -    int i, j, ret;
>>> -
>>> -    for (i = 0; i < fp->len; i++) {
>>> -        /*
>>> -         * During the extra pass, only the branch target addresses for
>>> -         * the subprog calls need to be fixed. All other instructions
>>> -         * can left untouched.
>>> -         *
>>> -         * The JITed image length does not change because we already
>>> -         * ensure that the JITed instruction sequence for these calls
>>> -         * are of fixed length by padding them with NOPs.
>>> -         */
>>> -        if (insn[i].code == (BPF_JMP | BPF_CALL) &&
>>> -            insn[i].src_reg == BPF_PSEUDO_CALL) {
>>> -            ret = bpf_jit_get_func_addr(fp, &insn[i], true,
>>> -                            &func_addr,
>>> -                            &func_addr_fixed);
>> 
>> I don't see you updating calls to bpf_jit_get_func_addr() in 
>> bpf_jit_build_body() to set extra_pass to true. Afaics, that's required 
>> to get the correct address to be branched to for subprogs.
>> 
> 
> I don't understand what you mean.

I am referring to the third parameter we pass to 
bpf_jit_get_func_addr().

In bpf_jit_build_body(), we do:

		case BPF_JMP | BPF_CALL:
			ctx->seen |= SEEN_FUNC;

			ret = bpf_jit_get_func_addr(fp, &insn[i], false,
						    &func_addr, &func_addr_fixed);


The third parameter (extra_pass) to bpf_jit_get_func_addr() is set to 
false. In bpf_jit_get_func_addr(), we have:

	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
	if (!*func_addr_fixed) {
		/* Place-holder address till the last pass has collected
		 * all addresses for JITed subprograms in which case we
		 * can pick them up from prog->aux.
		 */
		if (!extra_pass)
			addr = NULL;

Before this patch series, in bpf_jit_fixup_addresses(), we were calling 
bpf_jit_get_func_addr() with the third parameter set to true.


- Naveen
Christophe Leroy Jan. 31, 2023, 9:52 a.m. UTC | #4
Le 10/01/2023 à 09:44, Naveen N. Rao a écrit :
> Christophe Leroy wrote:
>>
>>
>> Le 13/12/2022 à 11:23, Naveen N. Rao a écrit :
>>> Christophe Leroy wrote:
>>>> BPF core calls the jit compiler again for an extra pass in order
>>>> to properly set subprog addresses.
>>>>
>>>> Unlike other architectures, powerpc only updates the addresses
>>>> during that extra pass. It means that holes must have been left
>>>> in the code in order to enable the maximum possible instruction
>>>> size.
>>>>
>>>> In order avoid waste of space, and waste of CPU time on powerpc
>>>> processors on which the NOP instruction is not 0-cycle, perform
>>>> two real additional passes.
>>>>
>>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>>> ---
>>>>  arch/powerpc/net/bpf_jit_comp.c | 85 ---------------------------------
>>>>  1 file changed, 85 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>>>> b/arch/powerpc/net/bpf_jit_comp.c
>>>> index 43e634126514..8833bf23f5aa 100644
>>>> --- a/arch/powerpc/net/bpf_jit_comp.c
>>>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>>>> @@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, 
>>>> unsigned int size)
>>>>      memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>>>>  }
>>>>
>>>> -/* Fix updated addresses (for subprog calls, ldimm64, et al) during 
>>>> extra pass */
>>>> -static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
>>>> -                   struct codegen_context *ctx, u32 *addrs)
>>>> -{
>>>> -    const struct bpf_insn *insn = fp->insnsi;
>>>> -    bool func_addr_fixed;
>>>> -    u64 func_addr;
>>>> -    u32 tmp_idx;
>>>> -    int i, j, ret;
>>>> -
>>>> -    for (i = 0; i < fp->len; i++) {
>>>> -        /*
>>>> -         * During the extra pass, only the branch target addresses for
>>>> -         * the subprog calls need to be fixed. All other instructions
>>>> -         * can left untouched.
>>>> -         *
>>>> -         * The JITed image length does not change because we already
>>>> -         * ensure that the JITed instruction sequence for these calls
>>>> -         * are of fixed length by padding them with NOPs.
>>>> -         */
>>>> -        if (insn[i].code == (BPF_JMP | BPF_CALL) &&
>>>> -            insn[i].src_reg == BPF_PSEUDO_CALL) {
>>>> -            ret = bpf_jit_get_func_addr(fp, &insn[i], true,
>>>> -                            &func_addr,
>>>> -                            &func_addr_fixed);
>>>
>>> I don't see you updating calls to bpf_jit_get_func_addr() in 
>>> bpf_jit_build_body() to set extra_pass to true. Afaics, that's 
>>> required to get the correct address to be branched to for subprogs.
>>>
>>
>> I don't understand what you mean.
> 
> I am referring to the third parameter we pass to bpf_jit_get_func_addr().
> 
> In bpf_jit_build_body(), we do:
> 
>          case BPF_JMP | BPF_CALL:
>              ctx->seen |= SEEN_FUNC;
> 
>              ret = bpf_jit_get_func_addr(fp, &insn[i], false,
>                              &func_addr, &func_addr_fixed);
> 
> 
> The third parameter (extra_pass) to bpf_jit_get_func_addr() is set to 
> false. In bpf_jit_get_func_addr(), we have:
> 
>      *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
>      if (!*func_addr_fixed) {
>          /* Place-holder address till the last pass has collected
>           * all addresses for JITed subprograms in which case we
>           * can pick them up from prog->aux.
>           */
>          if (!extra_pass)
>              addr = NULL;
> 
> Before this patch series, in bpf_jit_fixup_addresses(), we were calling 
> bpf_jit_get_func_addr() with the third parameter set to true.

Ah right, I see.

I will send out v2 shortly.

Thanks
Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 43e634126514..8833bf23f5aa 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -23,74 +23,6 @@  static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
 }
 
-/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
-static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
-				   struct codegen_context *ctx, u32 *addrs)
-{
-	const struct bpf_insn *insn = fp->insnsi;
-	bool func_addr_fixed;
-	u64 func_addr;
-	u32 tmp_idx;
-	int i, j, ret;
-
-	for (i = 0; i < fp->len; i++) {
-		/*
-		 * During the extra pass, only the branch target addresses for
-		 * the subprog calls need to be fixed. All other instructions
-		 * can left untouched.
-		 *
-		 * The JITed image length does not change because we already
-		 * ensure that the JITed instruction sequence for these calls
-		 * are of fixed length by padding them with NOPs.
-		 */
-		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
-		    insn[i].src_reg == BPF_PSEUDO_CALL) {
-			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
-						    &func_addr,
-						    &func_addr_fixed);
-			if (ret < 0)
-				return ret;
-
-			/*
-			 * Save ctx->idx as this would currently point to the
-			 * end of the JITed image and set it to the offset of
-			 * the instruction sequence corresponding to the
-			 * subprog call temporarily.
-			 */
-			tmp_idx = ctx->idx;
-			ctx->idx = addrs[i] / 4;
-			ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr);
-			if (ret)
-				return ret;
-
-			/*
-			 * Restore ctx->idx here. This is safe as the length
-			 * of the JITed sequence remains unchanged.
-			 */
-			ctx->idx = tmp_idx;
-		} else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
-			tmp_idx = ctx->idx;
-			ctx->idx = addrs[i] / 4;
-#ifdef CONFIG_PPC32
-			PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm);
-			PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm);
-			for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
-				EMIT(PPC_RAW_NOP());
-#else
-			func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
-			PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr);
-			/* overwrite rest with nops */
-			for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
-				EMIT(PPC_RAW_NOP());
-#endif
-			ctx->idx = tmp_idx;
-			i++;
-		}
-	}
-
-	return 0;
-}
-
 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
 {
 	if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
@@ -234,22 +166,6 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 
-	if (extra_pass) {
-		/*
-		 * Do not touch the prologue and epilogue as they will remain
-		 * unchanged. Only fix the branch target address for subprog
-		 * calls in the body, and ldimm64 instructions.
-		 *
-		 * This does not change the offsets and lengths of the subprog
-		 * call instruction sequences and hence, the size of the JITed
-		 * image as well.
-		 */
-		bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
-
-		/* There is no need to perform the usual passes. */
-		goto skip_codegen_passes;
-	}
-
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
 		/* Now build the prologue, body code & epilogue for real. */
@@ -268,7 +184,6 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 				proglen - (cgctx.idx * 4), cgctx.seen);
 	}
 
-skip_codegen_passes:
 	if (bpf_jit_enable > 1)
 		/*
 		 * Note that we output the base address of the code_base