diff mbox series

[bpf-next] bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.

Message ID 20210202053837.95909-1-alexei.starovoitov@gmail.com (mailing list archive)
State Accepted
Commit 4c5de127598e1b725aa3a5e38ac711472566ca60
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions. | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 13 maintainers not CCed: bp@alien8.de x86@kernel.org songliubraving@fb.com andrii@kernel.org yoshfuji@linux-ipv6.org ast@kernel.org hpa@zytor.com kpsingh@kernel.org mingo@redhat.com tglx@linutronix.de john.fastabend@gmail.com kafai@fb.com yhs@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Alexei Starovoitov Feb. 2, 2021, 5:38 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

PTR_TO_BTF_ID registers contain either kernel pointer or NULL.
Emit the NULL check explicitly by JIT instead of going into
do_user_addr_fault() on NULL deference.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

Comments

Song Liu Feb. 3, 2021, 12:56 a.m. UTC | #1
> On Feb 1, 2021, at 9:38 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> From: Alexei Starovoitov <ast@kernel.org>
> 
> PTR_TO_BTF_ID registers contain either kernel pointer or NULL.
> Emit the NULL check explicitly by JIT instead of going into
> do_user_addr_fault() on NULL deference.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
> arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
> 
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index b7a2911bda77..a3dc3bd154ac 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -930,6 +930,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
> 		u32 dst_reg = insn->dst_reg;
> 		u32 src_reg = insn->src_reg;
> 		u8 b2 = 0, b3 = 0;
> +		u8 *start_of_ldx;
> 		s64 jmp_offset;
> 		u8 jmp_cond;
> 		u8 *func;
> @@ -1278,12 +1279,30 @@ st:			if (is_imm8(insn->off))
> 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
> 		case BPF_LDX | BPF_MEM | BPF_DW:
> 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> +			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
> +				/* test src_reg, src_reg */
> +				maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
> +				EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
> +				/* jne start_of_ldx */
> +				EMIT2(X86_JNE, 0);
> +				/* xor dst_reg, dst_reg */
> +				emit_mov_imm32(&prog, false, dst_reg, 0);
> +				/* jmp byte_after_ldx */
> +				EMIT2(0xEB, 0);
> +
> +				/* populate jmp_offset for JNE above */
> +				temp[4] = prog - temp - 5 /* sizeof(test + jne) */;

IIUC, this case only happens for i == 1 in the loop? If so, can we use temp[5(?)] 
instead of start_of_ldx?

Thanks,
Song

> +				start_of_ldx = prog;
> +			}
> 			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
> 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
> 				struct exception_table_entry *ex;
> 				u8 *_insn = image + proglen;
> 				s64 delta;
> 
> +				/* populate jmp_offset for JMP above */
> +				start_of_ldx[-1] = prog - start_of_ldx;
> +
> 				if (!bpf_prog->aux->extable)
> 					break;
> 
> -- 
> 2.24.1
>
Alexei Starovoitov Feb. 3, 2021, 2:19 a.m. UTC | #2
On Wed, Feb 03, 2021 at 12:56:39AM +0000, Song Liu wrote:
> 
> 
> > On Feb 1, 2021, at 9:38 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> > 
> > From: Alexei Starovoitov <ast@kernel.org>
> > 
> > PTR_TO_BTF_ID registers contain either kernel pointer or NULL.
> > Emit the NULL check explicitly by JIT instead of going into
> > do_user_addr_fault() on NULL deference.
> > 
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> > ---
> > arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++
> > 1 file changed, 19 insertions(+)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index b7a2911bda77..a3dc3bd154ac 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -930,6 +930,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
> > 		u32 dst_reg = insn->dst_reg;
> > 		u32 src_reg = insn->src_reg;
> > 		u8 b2 = 0, b3 = 0;
> > +		u8 *start_of_ldx;
> > 		s64 jmp_offset;
> > 		u8 jmp_cond;
> > 		u8 *func;
> > @@ -1278,12 +1279,30 @@ st:			if (is_imm8(insn->off))
> > 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
> > 		case BPF_LDX | BPF_MEM | BPF_DW:
> > 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> > +			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
> > +				/* test src_reg, src_reg */
> > +				maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
> > +				EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
> > +				/* jne start_of_ldx */
> > +				EMIT2(X86_JNE, 0);
> > +				/* xor dst_reg, dst_reg */
> > +				emit_mov_imm32(&prog, false, dst_reg, 0);
> > +				/* jmp byte_after_ldx */
> > +				EMIT2(0xEB, 0);
> > +
> > +				/* populate jmp_offset for JNE above */
> > +				temp[4] = prog - temp - 5 /* sizeof(test + jne) */;
> 
> IIUC, this case only happens for i == 1 in the loop? If so, can we use temp[5(?)] 
> instead of start_of_ldx?

I don't understand the question, but let me try anyway :)
temp is a buffer for single instruction.
prog=temp; for every loop iteration (not only i == 1)
temp[4] is second byte in JNE instruction as the comment says.
temp[5] is a byte after JNE. It's a first byte of XOR.
That XOR is variable length instruction. 
Hence while emitting JNE we don't know the target offset in JNE and just use 0.
So temp[4] assignment populates with actual offset, since now we know the size
of XOR.
Song Liu Feb. 3, 2021, 6:37 p.m. UTC | #3
> On Feb 2, 2021, at 6:19 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> On Wed, Feb 03, 2021 at 12:56:39AM +0000, Song Liu wrote:
>> 
>> 
>>> On Feb 1, 2021, at 9:38 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>>> 
>>> From: Alexei Starovoitov <ast@kernel.org>
>>> 
>>> PTR_TO_BTF_ID registers contain either kernel pointer or NULL.
>>> Emit the NULL check explicitly by JIT instead of going into
>>> do_user_addr_fault() on NULL deference.
>>> 
>>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>>> ---
>>> arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++
>>> 1 file changed, 19 insertions(+)
>>> 
>>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>>> index b7a2911bda77..a3dc3bd154ac 100644
>>> --- a/arch/x86/net/bpf_jit_comp.c
>>> +++ b/arch/x86/net/bpf_jit_comp.c
>>> @@ -930,6 +930,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>>> 		u32 dst_reg = insn->dst_reg;
>>> 		u32 src_reg = insn->src_reg;
>>> 		u8 b2 = 0, b3 = 0;
>>> +		u8 *start_of_ldx;
>>> 		s64 jmp_offset;
>>> 		u8 jmp_cond;
>>> 		u8 *func;
>>> @@ -1278,12 +1279,30 @@ st:			if (is_imm8(insn->off))
>>> 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
>>> 		case BPF_LDX | BPF_MEM | BPF_DW:
>>> 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
>>> +			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
>>> +				/* test src_reg, src_reg */
>>> +				maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
>>> +				EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
>>> +				/* jne start_of_ldx */
>>> +				EMIT2(X86_JNE, 0);
>>> +				/* xor dst_reg, dst_reg */
>>> +				emit_mov_imm32(&prog, false, dst_reg, 0);
>>> +				/* jmp byte_after_ldx */
>>> +				EMIT2(0xEB, 0);
>>> +
>>> +				/* populate jmp_offset for JNE above */
>>> +				temp[4] = prog - temp - 5 /* sizeof(test + jne) */;
>> 
>> IIUC, this case only happens for i == 1 in the loop? If so, can we use temp[5(?)] 
>> instead of start_of_ldx?
> 
> I don't understand the question, but let me try anyway :)
> temp is a buffer for single instruction.
> prog=temp; for every loop iteration (not only i == 1)

Thanks for the explanation. I misunderstood how we use prog in the loop. 

> temp[4] is second byte in JNE instruction as the comment says.
> temp[5] is a byte after JNE. It's a first byte of XOR.
> That XOR is variable length instruction. 
> Hence while emitting JNE we don't know the target offset in JNE and just use 0.
> So temp[4] assignment populates with actual offset, since now we know the size
> of XOR.

And after reading emit_ldx() more carefully, I agree that introducing 
start_of_ldx would simplify the logic here. 

Acked-by: Song Liu <songliubraving@fb.com>
patchwork-bot+netdevbpf@kernel.org Feb. 4, 2021, 4:10 p.m. UTC | #4
Hello:

This patch was applied to bpf/bpf-next.git (refs/heads/master):

On Mon,  1 Feb 2021 21:38:37 -0800 you wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> PTR_TO_BTF_ID registers contain either kernel pointer or NULL.
> Emit the NULL check explicitly by JIT instead of going into
> do_user_addr_fault() on NULL deference.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> 
> [...]

Here is the summary with links:
  - [bpf-next] bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.
    https://git.kernel.org/bpf/bpf-next/c/4c5de127598e

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b7a2911bda77..a3dc3bd154ac 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -930,6 +930,7 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
 		u8 b2 = 0, b3 = 0;
+		u8 *start_of_ldx;
 		s64 jmp_offset;
 		u8 jmp_cond;
 		u8 *func;
@@ -1278,12 +1279,30 @@  st:			if (is_imm8(insn->off))
 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		case BPF_LDX | BPF_MEM | BPF_DW:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+				/* test src_reg, src_reg */
+				maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
+				EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
+				/* jne start_of_ldx */
+				EMIT2(X86_JNE, 0);
+				/* xor dst_reg, dst_reg */
+				emit_mov_imm32(&prog, false, dst_reg, 0);
+				/* jmp byte_after_ldx */
+				EMIT2(0xEB, 0);
+
+				/* populate jmp_offset for JNE above */
+				temp[4] = prog - temp - 5 /* sizeof(test + jne) */;
+				start_of_ldx = prog;
+			}
 			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
 				struct exception_table_entry *ex;
 				u8 *_insn = image + proglen;
 				s64 delta;
 
+				/* populate jmp_offset for JMP above */
+				start_of_ldx[-1] = prog - start_of_ldx;
+
 				if (!bpf_prog->aux->extable)
 					break;