diff mbox series

[bpf-next,v4,3/6] bpf/helpers: introduce bpf_timer_set_sleepable_cb() kfunc

Message ID 20240315-hid-bpf-sleepable-v4-3-5658f2540564@kernel.org (mailing list archive)
State New
Headers show
Series sleepable bpf_timer (was: allow HID-BPF to do device IOs) | expand

Commit Message

Benjamin Tissoires March 15, 2024, 2:29 p.m. UTC
In this patch, bpf_timer_set_sleepable_cb() is functionally equivalent
to bpf_timer_set_callback(), to the exception that it enforces
the timer to be started with BPF_F_TIMER_SLEEPABLE.

But given that bpf_timer_set_callback() is a helper when
bpf_timer_set_sleepable_cb() is a kfunc, we need to teach the verifier
about its attached callback.
Marking that callback as sleepable will be done in a separate patch

Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>

---

changes in v4:
- added a new (ignored) argument to the kfunc so that we do not
  need to wlak the stack

new in v3 (split from v2 02/10)
---
 kernel/bpf/helpers.c  | 46 +++++++++++++++++++++++++++++++++++++--
 kernel/bpf/verifier.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 102 insertions(+), 4 deletions(-)

Comments

Eduard Zingerman March 18, 2024, 10:52 p.m. UTC | #1
On Fri, 2024-03-15 at 15:29 +0100, Benjamin Tissoires wrote:

This patch looks good to me, please see two nitpicks below.
Acked-by: Eduard Zingerman <eddyz87@gmail.com>

[...]

> @@ -1350,6 +1358,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
>  		goto out;
>  	}
>  
> +	if (t->is_sleepable && !(flags & BPF_F_TIMER_SLEEPABLE)) {
> +		ret = -EINVAL;
> +		goto out;
> +	}

Nit:
the BPF_F_TIMER_ABS and BPF_F_TIMER_CPU_PIN don't affect
sleepable timers, should this check be changed to:
'(t->is_sleepable && flags != BPF_F_TIMER_SLEEPABLE)' ?

[...]

> @@ -12151,6 +12175,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		}
>  	}
>  
> +	if (is_async_callback_calling_kfunc(meta.func_id)) {
> +		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
> +					 set_timer_callback_state);

Nit: still think that this fragment would be better as:

	if (is_bpf_timer_set_sleepable_cb_impl_kfunc(meta.func_id)) {
		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
					 set_timer_callback_state);

Because of the 'set_timer_callback_state' passed to push_callback_call().

> +		if (err) {
> +			verbose(env, "kfunc %s#%d failed callback verification\n",
> +				func_name, meta.func_id);
> +			return err;
> +		}
> +	}
> +
>  	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
>  	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
>
Benjamin Tissoires March 21, 2024, 3:44 p.m. UTC | #2
On Mon, Mar 18, 2024 at 11:52 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Fri, 2024-03-15 at 15:29 +0100, Benjamin Tissoires wrote:
>
> This patch looks good to me, please see two nitpicks below.
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>

Thanks!

>
> [...]
>
> > @@ -1350,6 +1358,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
> >               goto out;
> >       }
> >
> > +     if (t->is_sleepable && !(flags & BPF_F_TIMER_SLEEPABLE)) {
> > +             ret = -EINVAL;
> > +             goto out;
> > +     }
>
> Nit:
> the BPF_F_TIMER_ABS and BPF_F_TIMER_CPU_PIN don't affect
> sleepable timers, should this check be changed to:
> '(t->is_sleepable && flags != BPF_F_TIMER_SLEEPABLE)' ?

Sounds fair enough. Scheduled this for v5

>
> [...]
>
> > @@ -12151,6 +12175,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >               }
> >       }
> >
> > +     if (is_async_callback_calling_kfunc(meta.func_id)) {
> > +             err = push_callback_call(env, insn, insn_idx, meta.subprogno,
> > +                                      set_timer_callback_state);
>
> Nit: still think that this fragment would be better as:
>
>         if (is_bpf_timer_set_sleepable_cb_impl_kfunc(meta.func_id)) {
>                 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
>                                          set_timer_callback_state);
>
> Because of the 'set_timer_callback_state' passed to push_callback_call().

Yeah, sorry I missed that part from the previous reviews.

Fixed in v5

Cheers,
Benjamin

>
> > +             if (err) {
> > +                     verbose(env, "kfunc %s#%d failed callback verification\n",
> > +                             func_name, meta.func_id);
> > +                     return err;
> > +             }
> > +     }
> > +
> >       rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
> >       rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
> >
>
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 38de73a9df83..65c07c0df263 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1108,6 +1108,7 @@  struct bpf_hrtimer {
 	void *value;
 	struct rcu_head rcu;
 	spinlock_t sleepable_lock;
+	bool is_sleepable;
 };
 
 /* the actual struct hidden inside uapi struct bpf_timer */
@@ -1273,8 +1274,8 @@  static const struct bpf_func_proto bpf_timer_init_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
-	   struct bpf_prog_aux *, aux)
+static int __bpf_timer_set_callback(struct bpf_timer_kern *timer, void *callback_fn,
+				    struct bpf_prog_aux *aux, bool is_sleepable)
 {
 	struct bpf_prog *prev, *prog = aux->prog;
 	struct bpf_hrtimer *t;
@@ -1314,6 +1315,7 @@  BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callb
 		t->prog = prog;
 	}
 	rcu_assign_pointer(t->callback_fn, callback_fn);
+	t->is_sleepable = is_sleepable;
 out:
 	if (t)
 		spin_unlock(&t->sleepable_lock);
@@ -1321,6 +1323,12 @@  BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callb
 	return ret;
 }
 
+BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
+	   struct bpf_prog_aux *, aux)
+{
+	return __bpf_timer_set_callback(timer, callback_fn, aux, false);
+}
+
 static const struct bpf_func_proto bpf_timer_set_callback_proto = {
 	.func		= bpf_timer_set_callback,
 	.gpl_only	= true,
@@ -1350,6 +1358,11 @@  BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
 		goto out;
 	}
 
+	if (t->is_sleepable && !(flags & BPF_F_TIMER_SLEEPABLE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (flags & BPF_F_TIMER_ABS)
 		mode = HRTIMER_MODE_ABS_SOFT;
 	else
@@ -2627,6 +2640,34 @@  __bpf_kfunc void bpf_throw(u64 cookie)
 	WARN(1, "A call to BPF exception callback should never return\n");
 }
 
+/**
+ * bpf_timer_set_sleepable_cb_impl() - Configure the timer to call %callback_fn
+ * static function in a sleepable context.
+ * @timer: The bpf_timer that needs to be configured
+ * @callback_fn: a static bpf function
+ *
+ * @returns %0 on success. %-EINVAL if %timer was not initialized with
+ * bpf_timer_init() earlier. %-EPERM if %timer is in a map that doesn't
+ * have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * This kfunc is equivalent to %bpf_timer_set_callback except that it tells
+ * the verifier that the target callback is run in a sleepable context.
+ */
+__bpf_kfunc int bpf_timer_set_sleepable_cb_impl(struct bpf_timer_kern *timer,
+						int (callback_fn)(void *map, int *key, struct bpf_timer *timer),
+						void *aux__ign)
+{
+	struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign;
+
+	if (!aux)
+		return -EINVAL;
+
+	return __bpf_timer_set_callback(timer, (void *)callback_fn, aux, true);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -2703,6 +2744,7 @@  BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_timer_set_sleepable_cb_impl)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1483ebc0ee73..53f85e114a33 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -501,8 +501,12 @@  static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 }
 
 static bool is_sync_callback_calling_kfunc(u32 btf_id);
+static bool is_async_callback_calling_kfunc(u32 btf_id);
+static bool is_callback_calling_kfunc(u32 btf_id);
 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
 
+static bool is_bpf_timer_set_sleepable_cb_impl_kfunc(u32 btf_id);
+
 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
 {
 	return func_id == BPF_FUNC_for_each_map_elem ||
@@ -530,7 +534,8 @@  static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
 
 static bool is_async_callback_calling_insn(struct bpf_insn *insn)
 {
-	return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm);
+	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
+	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
 }
 
 static bool is_may_goto_insn(struct bpf_insn *insn)
@@ -9471,7 +9476,7 @@  static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
 	 */
 	env->subprog_info[subprog].is_cb = true;
 	if (bpf_pseudo_kfunc_call(insn) &&
-	    !is_sync_callback_calling_kfunc(insn->imm)) {
+	    !is_callback_calling_kfunc(insn->imm)) {
 		verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
 			func_id_name(insn->imm), insn->imm);
 		return -EFAULT;
@@ -10981,6 +10986,7 @@  enum special_kfunc_type {
 	KF_bpf_percpu_obj_drop_impl,
 	KF_bpf_throw,
 	KF_bpf_iter_css_task_new,
+	KF_bpf_timer_set_sleepable_cb_impl,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11007,6 +11013,7 @@  BTF_ID(func, bpf_throw)
 #ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
 #endif
+BTF_ID(func, bpf_timer_set_sleepable_cb_impl)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -11037,6 +11044,7 @@  BTF_ID(func, bpf_iter_css_task_new)
 #else
 BTF_ID_UNUSED
 #endif
+BTF_ID(func, bpf_timer_set_sleepable_cb_impl)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -11365,12 +11373,28 @@  static bool is_sync_callback_calling_kfunc(u32 btf_id)
 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
 }
 
+static bool is_async_callback_calling_kfunc(u32 btf_id)
+{
+	return btf_id == special_kfunc_list[KF_bpf_timer_set_sleepable_cb_impl];
+}
+
 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
 {
 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
 	       insn->imm == special_kfunc_list[KF_bpf_throw];
 }
 
+static bool is_bpf_timer_set_sleepable_cb_impl_kfunc(u32 btf_id)
+{
+	return btf_id == special_kfunc_list[KF_bpf_timer_set_sleepable_cb_impl];
+}
+
+static bool is_callback_calling_kfunc(u32 btf_id)
+{
+	return is_sync_callback_calling_kfunc(btf_id) ||
+	       is_async_callback_calling_kfunc(btf_id);
+}
+
 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
 {
 	return is_bpf_rbtree_api_kfunc(btf_id);
@@ -12151,6 +12175,16 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
+	if (is_async_callback_calling_kfunc(meta.func_id)) {
+		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+					 set_timer_callback_state);
+		if (err) {
+			verbose(env, "kfunc %s#%d failed callback verification\n",
+				func_name, meta.func_id);
+			return err;
+		}
+	}
+
 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 
@@ -19544,6 +19578,28 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
 		*cnt = 1;
+	} else if (is_bpf_timer_set_sleepable_cb_impl_kfunc(desc->func_id)) {
+		/* The verifier will process callback_fn as many times as necessary
+		 * with different maps and the register states prepared by
+		 * set_timer_callback_state will be accurate.
+		 *
+		 * The following use case is valid:
+		 *   map1 is shared by prog1, prog2, prog3.
+		 *   prog1 calls bpf_timer_init for some map1 elements
+		 *   prog2 calls bpf_timer_set_callback for some map1 elements.
+		 *     Those that were not bpf_timer_init-ed will return -EINVAL.
+		 *   prog3 calls bpf_timer_start for some map1 elements.
+		 *     Those that were not both bpf_timer_init-ed and
+		 *     bpf_timer_set_callback-ed will return -EINVAL.
+		 */
+		struct bpf_insn ld_addrs[2] = {
+			BPF_LD_IMM64(BPF_REG_3, (long)env->prog->aux),
+		};
+
+		insn_buf[0] = ld_addrs[0];
+		insn_buf[1] = ld_addrs[1];
+		insn_buf[2] = *insn;
+		*cnt = 3;
 	}
 	return 0;
 }