diff mbox series

[RFC,v8,03/20] bpf: Allow struct_ops prog to return referenced kptr

Message ID 20240510192412.3297104-4-amery.hung@bytedance.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf qdisc | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Amery Hung May 10, 2024, 7:23 p.m. UTC
This patch allows a struct_ops program to return a referenced kptr
if the struct_ops member has a pointer return type. To make sure the
pointer returned to kernel is valid, it needs to be referenced and
originally comes from the kernel. That is, it should be acquired
through kfuncs or struct_ops "ref_acquried" arguments, but not allocated
locally. Besides, null pointer is allowed. Therefore, kernel caller
of the struct_ops function consuming the pointer needs to take care of
the potential null pointer.

The first use case will be Qdisc_ops::dequeue, where a qdisc returns a
pointer to the skb to be dequeued.

To achieve this, we first allow a reference object to leak through return
if it is in the return register and the type matches the return type of the
function. Then, we check whether the pointer to-be-returned is valid in
check_return_code().

Signed-off-by: Amery Hung <amery.hung@bytedance.com>
---
 kernel/bpf/verifier.c | 50 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

Comments

Amery Hung May 17, 2024, 2:06 a.m. UTC | #1
On Fri, May 10, 2024 at 12:24 PM Amery Hung <ameryhung@gmail.com> wrote:
>
> This patch allows a struct_ops program to return a referenced kptr
> if the struct_ops member has a pointer return type. To make sure the
> pointer returned to kernel is valid, it needs to be referenced and
> originally comes from the kernel. That is, it should be acquired
> through kfuncs or struct_ops "ref_acquried" arguments, but not allocated
> locally. Besides, null pointer is allowed. Therefore, kernel caller
> of the struct_ops function consuming the pointer needs to take care of
> the potential null pointer.
>
> The first use case will be Qdisc_ops::dequeue, where a qdisc returns a
> pointer to the skb to be dequeued.
>
> To achieve this, we first allow a reference object to leak through return
> if it is in the return register and the type matches the return type of the
> function. Then, we check whether the pointer to-be-returned is valid in
> check_return_code().
>
> Signed-off-by: Amery Hung <amery.hung@bytedance.com>
> ---
>  kernel/bpf/verifier.c | 50 +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 46 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 06a6edd306fd..2d4a55ead85b 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -10081,16 +10081,36 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
>
>  static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
>  {
> +       enum bpf_prog_type type = resolve_prog_type(env->prog);
> +       u32 regno = exception_exit? BPF_REG_1 : BPF_REG_0;
> +       struct bpf_reg_state *reg = reg_state(env, regno);
>         struct bpf_func_state *state = cur_func(env);
> +       const struct bpf_prog *prog = env->prog;
> +       const struct btf_type *ret_type = NULL;
>         bool refs_lingering = false;
> +       struct btf *btf;
>         int i;
>
>         if (!exception_exit && state->frameno && !state->in_callback_fn)
>                 return 0;
>
> +       if (type == BPF_PROG_TYPE_STRUCT_OPS &&
> +           reg->type & PTR_TO_BTF_ID && reg->ref_obj_id) {
> +               btf = bpf_prog_get_target_btf(prog);
> +               ret_type = btf_type_by_id(btf, prog->aux->attach_func_proto->type);
> +               if (reg->btf_id != ret_type->type) {
> +                       verbose(env, "Return kptr type, struct %s, doesn't match function prototype, struct %s\n",
> +                               btf_type_name(reg->btf, reg->btf_id),
> +                               btf_type_name(btf, ret_type->type));
> +                       return -EINVAL;
> +               }
> +       }
> +
>         for (i = 0; i < state->acquired_refs; i++) {
>                 if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
>                         continue;
> +               if (ret_type && reg->ref_obj_id == state->refs[i].id)
> +                       continue;
>                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
>                         state->refs[i].id, state->refs[i].insn_idx);
>                 refs_lingering = true;
> @@ -15395,12 +15415,15 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>         const char *exit_ctx = "At program exit";
>         struct tnum enforce_attach_type_range = tnum_unknown;
>         const struct bpf_prog *prog = env->prog;
> -       struct bpf_reg_state *reg;
> +       struct bpf_reg_state *reg = reg_state(env, regno);
>         struct bpf_retval_range range = retval_range(0, 1);
>         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
>         int err;
>         struct bpf_func_state *frame = env->cur_state->frame[0];
>         const bool is_subprog = frame->subprogno;
> +       struct btf *btf = bpf_prog_get_target_btf(prog);
> +       bool st_ops_ret_is_kptr = false;
> +       const struct btf_type *t;
>
>         /* LSM and struct_ops func-ptr's return type could be "void" */
>         if (!is_subprog || frame->in_exception_callback_fn) {
> @@ -15409,10 +15432,26 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>                         if (prog->expected_attach_type == BPF_LSM_CGROUP)
>                                 /* See below, can be 0 or 0-1 depending on hook. */
>                                 break;
> -                       fallthrough;
> +                       if (!prog->aux->attach_func_proto->type)
> +                               return 0;
> +                       break;
>                 case BPF_PROG_TYPE_STRUCT_OPS:
>                         if (!prog->aux->attach_func_proto->type)
>                                 return 0;
> +
> +                       t = btf_type_by_id(btf, prog->aux->attach_func_proto->type);
> +                       if (btf_type_is_ptr(t)) {
> +                               /* Allow struct_ops programs to return kptr or null if
> +                                * the return type is a pointer type.
> +                                * check_reference_leak has ensured the returning kptr
> +                                * matches the type of the function prototype and is
> +                                * the only leaking reference. Thus, we can safely return
> +                                * if the pointer is in its unmodified form
> +                                */
> +                               if (reg->type & PTR_TO_BTF_ID)
> +                                       return __check_ptr_off_reg(env, reg, regno, false);
> +                               st_ops_ret_is_kptr = true;
> +                       }
>                         break;
>                 default:
>                         break;
> @@ -15434,8 +15473,6 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>                 return -EACCES;
>         }
>
> -       reg = cur_regs(env) + regno;
> -
>         if (frame->in_async_callback_fn) {
>                 /* enforce return zero from async callbacks like timer */
>                 exit_ctx = "At async callback return";
> @@ -15522,6 +15559,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>         case BPF_PROG_TYPE_NETFILTER:
>                 range = retval_range(NF_DROP, NF_ACCEPT);
>                 break;
> +       case BPF_PROG_TYPE_STRUCT_OPS:
> +               if (!st_ops_ret_is_kptr)
> +                       return 0;
> +               range = retval_range(0, 0);
> +               break;

Arguments and the return for helpers and kfuncs, where we transition
between bpf program and kernel, can be tagged, so that we can do
proper checks. struct_ops shares the similar property in that sense,
but currently lacks the ability to tag the return.

A discussion was that, here we assume the returning referenced kptr
"may be null" because that's what Qdisc expects. I think it would make
sense to only allow it when the return is explicitly tagged with
MAY_BE_NULL. How about doing so in the stub function name?


>         case BPF_PROG_TYPE_EXT:
>                 /* freplace program can return anything as its return value
>                  * depends on the to-be-replaced kernel func or bpf program.
> --
> 2.20.1
>
Martin KaFai Lau May 17, 2024, 5:30 a.m. UTC | #2
On 5/16/24 7:06 PM, Amery Hung wrote:
> Arguments and the return for helpers and kfuncs, where we transition
> between bpf program and kernel, can be tagged, so that we can do
> proper checks. struct_ops shares the similar property in that sense,
> but currently lacks the ability to tag the return.
> 
> A discussion was that, here we assume the returning referenced kptr
> "may be null" because that's what Qdisc expects. 

As a return value of a kernel function, it usually needs to return error. I was 
thinking "may be null" is actually the common case if it is not ERR_PTR.

> I think it would make sense to only allow it when the return is explicitly
> tagged with MAY_BE_NULL. How about doing so in the stub function name?

I think this is something internal and qdisc is the only case now. The default 
is something that could be improved later as a followup and not necessary a blocker?

But, yeah, if it is obvious to make the return ptr expectation/tagging 
consistent to how __nullable means to the argument, it would be nice. Tagging 
the stub function name like "__ret_null"? That should work. I think it will need 
some plumbing from bpf_struct_ops.c to the verifier here.
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 06a6edd306fd..2d4a55ead85b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10081,16 +10081,36 @@  record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 
 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
 {
+	enum bpf_prog_type type = resolve_prog_type(env->prog);
+	u32 regno = exception_exit? BPF_REG_1 : BPF_REG_0;
+	struct bpf_reg_state *reg = reg_state(env, regno);
 	struct bpf_func_state *state = cur_func(env);
+	const struct bpf_prog *prog = env->prog;
+	const struct btf_type *ret_type = NULL;
 	bool refs_lingering = false;
+	struct btf *btf;
 	int i;
 
 	if (!exception_exit && state->frameno && !state->in_callback_fn)
 		return 0;
 
+	if (type == BPF_PROG_TYPE_STRUCT_OPS &&
+	    reg->type & PTR_TO_BTF_ID && reg->ref_obj_id) {
+		btf = bpf_prog_get_target_btf(prog);
+		ret_type = btf_type_by_id(btf, prog->aux->attach_func_proto->type);
+		if (reg->btf_id != ret_type->type) {
+			verbose(env, "Return kptr type, struct %s, doesn't match function prototype, struct %s\n",
+				btf_type_name(reg->btf, reg->btf_id),
+				btf_type_name(btf, ret_type->type));
+			return -EINVAL;
+		}
+	}
+
 	for (i = 0; i < state->acquired_refs; i++) {
 		if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
 			continue;
+		if (ret_type && reg->ref_obj_id == state->refs[i].id)
+			continue;
 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
 			state->refs[i].id, state->refs[i].insn_idx);
 		refs_lingering = true;
@@ -15395,12 +15415,15 @@  static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	const char *exit_ctx = "At program exit";
 	struct tnum enforce_attach_type_range = tnum_unknown;
 	const struct bpf_prog *prog = env->prog;
-	struct bpf_reg_state *reg;
+	struct bpf_reg_state *reg = reg_state(env, regno);
 	struct bpf_retval_range range = retval_range(0, 1);
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	int err;
 	struct bpf_func_state *frame = env->cur_state->frame[0];
 	const bool is_subprog = frame->subprogno;
+	struct btf *btf = bpf_prog_get_target_btf(prog);
+	bool st_ops_ret_is_kptr = false;
+	const struct btf_type *t;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
 	if (!is_subprog || frame->in_exception_callback_fn) {
@@ -15409,10 +15432,26 @@  static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 			if (prog->expected_attach_type == BPF_LSM_CGROUP)
 				/* See below, can be 0 or 0-1 depending on hook. */
 				break;
-			fallthrough;
+			if (!prog->aux->attach_func_proto->type)
+				return 0;
+			break;
 		case BPF_PROG_TYPE_STRUCT_OPS:
 			if (!prog->aux->attach_func_proto->type)
 				return 0;
+
+			t = btf_type_by_id(btf, prog->aux->attach_func_proto->type);
+			if (btf_type_is_ptr(t)) {
+				/* Allow struct_ops programs to return kptr or null if
+				 * the return type is a pointer type.
+				 * check_reference_leak has ensured the returning kptr
+				 * matches the type of the function prototype and is
+				 * the only leaking reference. Thus, we can safely return
+				 * if the pointer is in its unmodified form
+				 */
+				if (reg->type & PTR_TO_BTF_ID)
+					return __check_ptr_off_reg(env, reg, regno, false);
+				st_ops_ret_is_kptr = true;
+			}
 			break;
 		default:
 			break;
@@ -15434,8 +15473,6 @@  static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 		return -EACCES;
 	}
 
-	reg = cur_regs(env) + regno;
-
 	if (frame->in_async_callback_fn) {
 		/* enforce return zero from async callbacks like timer */
 		exit_ctx = "At async callback return";
@@ -15522,6 +15559,11 @@  static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	case BPF_PROG_TYPE_NETFILTER:
 		range = retval_range(NF_DROP, NF_ACCEPT);
 		break;
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		if (!st_ops_ret_is_kptr)
+			return 0;
+		range = retval_range(0, 0);
+		break;
 	case BPF_PROG_TYPE_EXT:
 		/* freplace program can return anything as its return value
 		 * depends on the to-be-replaced kernel func or bpf program.