From patchwork Thu Oct 10 17:56:18 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13830611 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1FBA219D89E for ; Thu, 10 Oct 2024 17:56:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728582980; cv=none; b=BzYAkXuZ2R0BztNvWwYp7I627Iu/9cMy3/1HwIDMmeUpDCltPQGT+kqAt5qIacDJb1qWnj0SALFYeEdwHn5/ZphszbTgfRhbyzHNSdw5+/N2F2PyBycyh1o7F3ODeuGpfJfLJ1QgGI99MDmcsWZUktZpRQckdTchQB8Euyefm88= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1728582980; c=relaxed/simple; bh=yuLxpvwL7uG7lRCX6678J8lv7oU/X2ADSI5kFDo6wiM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=S0bXohcjK7582w/57R6dzJehMSIlh91nNzqtv9Mxi5H4vPaKT2gHcErHJqIvXelVcRRLqvSmrfHUtj1b5CNhVc3qddz/35hAnSE5XJCkwcw5bkIANBjVkxE5VTBYidc/DMMfvkEOGklS9Z+5X1LGlteun86WhfLulx7/nSgRDwM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 25A239F27BD1; Thu, 10 Oct 2024 10:56:18 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau , Tejun Heo Subject: [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Date: Thu, 10 Oct 2024 10:56:18 -0700 Message-ID: <20241010175618.1897998-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev> References: <20241010175552.1895980-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Add jit support for private stack. For a particular subtree, e.g., subtree_root <== stack depth 120 subprog1 <== stack depth 80 subprog2 <== stack depth 40 subprog3 <== stack depth 160 Let us say that priv_stack_ptr is the memory address allocated for private stack. The frame pointer for each above is calculated like below: subtree_root <== subtree_root_fp = private_stack_ptr + 120 subprog1 <== subtree_subprog1_fp = subtree_root_fp + 80 subprog2 <== subtree_subprog2_fp = subtree_subprog1_fp + 40 subprog3 <== subtree_subprog1_fp = subtree_root_fp + 160 For any function call to helper/kfunc, push/pop prog frame pointer is needed in order to preserve frame pointer value. To deal with exception handling, push/pop frame pointer is also used surrounding call to subsequent subprog. For example, subtree_root subprog1 ... insn: call bpf_throw ... After jit, we will have subtree_root insn: push r9 subprog1 ... insn: push r9 insn: call bpf_throw insn: pop r9 ... insn: pop r9 exception_handler pop r9 ... where r9 represents the fp for each subprog. Signed-off-by: Yonghong Song --- arch/x86/net/bpf_jit_comp.c | 88 ++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f01fdabf786e..a6ba85cec49a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -325,6 +325,22 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) +static void push_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x51); /* push r9 */ + *pprog = prog; +} + +static void pop_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x59); /* pop r9 */ + *pprog = prog; +} + static void push_r12(u8 **pprog) { u8 *prog = *pprog; @@ -484,13 +500,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) *pprog = prog; } +static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog, + enum bpf_priv_stack_mode priv_stack_mode); + /* * Emit x86-64 prologue code for BPF program. * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes * while jumping to another program */ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog, - bool tail_call_reachable) + bool tail_call_reachable, + enum bpf_priv_stack_mode priv_stack_mode) { bool ebpf_from_cbpf = bpf_prog_was_classic(bpf_prog); bool is_exception_cb = bpf_prog->aux->exception_cb; @@ -520,6 +540,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog * first restore those callee-saved regs from stack, before * reusing the stack frame. */ + if (priv_stack_mode != NO_PRIV_STACK) + pop_r9(&prog); pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); /* Reset the stack frame. */ @@ -532,6 +554,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog /* X86_TAIL_CALL_OFFSET is here */ EMIT_ENDBR(); + emit_priv_frame_ptr(&prog, bpf_prog, priv_stack_mode); + /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); @@ -1451,6 +1475,42 @@ static void emit_alu_helper_1(u8 **pprog, u8 insn_code, u32 dst_reg, s32 imm32) *pprog = prog; } +static void emit_root_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog, + u32 orig_stack_depth) +{ + void __percpu *priv_frame_ptr; + u8 *prog = *pprog; + + priv_frame_ptr = bpf_prog->aux->priv_stack_ptr + orig_stack_depth; + + /* movabs r9, priv_frame_ptr */ + emit_mov_imm64(&prog, X86_REG_R9, (long) priv_frame_ptr >> 32, + (u32) (long) priv_frame_ptr); +#ifdef CONFIG_SMP + /* add , gs:[] */ + EMIT2(0x65, 0x4c); + EMIT3(0x03, 0x0c, 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + *pprog = prog; +} + +static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog, + enum bpf_priv_stack_mode priv_stack_mode) +{ + u32 orig_stack_depth = round_up(bpf_prog->aux->stack_depth, 8); + u8 *prog = *pprog; + + if (priv_stack_mode == PRIV_STACK_ROOT_PROG) + emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth); + else if (priv_stack_mode == PRIV_STACK_SUB_PROG && orig_stack_depth) + /* r9 += orig_stack_depth */ + emit_alu_helper_1(&prog, BPF_ALU64 | BPF_ADD | BPF_K, X86_REG_R9, + orig_stack_depth); + + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) #define __LOAD_TCC_PTR(off) \ @@ -1464,6 +1524,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image { bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; struct bpf_insn *insn = bpf_prog->insnsi; + enum bpf_priv_stack_mode priv_stack_mode; bool callee_regs_used[4] = {}; int insn_cnt = bpf_prog->len; bool seen_exit = false; @@ -1476,13 +1537,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int err; stack_depth = bpf_prog->aux->stack_depth; + priv_stack_mode = bpf_prog->aux->priv_stack_mode; + if (priv_stack_mode != NO_PRIV_STACK) + stack_depth = 0; arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable); + emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable, + priv_stack_mode); /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -1521,6 +1586,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 *func; int nops; + if (priv_stack_mode != NO_PRIV_STACK) { + if (src_reg == BPF_REG_FP) + src_reg = X86_REG_R9; + + if (dst_reg == BPF_REG_FP) + dst_reg = X86_REG_R9; + } + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -2146,9 +2219,15 @@ st: if (is_imm8(insn->off)) } if (!imm32) return -EINVAL; + if (priv_stack_mode != NO_PRIV_STACK) { + push_r9(&prog); + ip += 2; + } ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; + if (priv_stack_mode != NO_PRIV_STACK) + pop_r9(&prog); break; } @@ -3572,6 +3651,11 @@ bool bpf_jit_supports_exceptions(void) return IS_ENABLED(CONFIG_UNWINDER_ORC); } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC)