diff mbox series

[RFC,bpf-next,1/3] bpf: allow maps to hold bpf_delayed_work fields

Message ID 37859ca03aaaba23f60288de044a3a10d52a79b4.1657576063.git.delyank@fb.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Execution context callbacks | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next
bpf/vmtest-bpf-next-PR fail merge-conflict

Commit Message

Delyan Kratunov July 11, 2022, 9:48 p.m. UTC
Similarly to bpf_timer, bpf_delayed_work represents a callback that will
be executed at a later time, in a different execution context.

Its treatment in maps is practically the same as timers (to a degree
that perhaps calls for refactoring), except releasing the work does not
need to release any resources - we will wait for pending executions in
the program destruction path.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 include/linux/bpf.h            |  9 ++++++++-
 include/linux/btf.h            |  1 +
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/btf.c               | 21 +++++++++++++++++++++
 kernel/bpf/syscall.c           | 24 ++++++++++++++++++++++--
 kernel/bpf/verifier.c          |  9 +++++++++
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 7 files changed, 77 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko July 14, 2022, 4:23 a.m. UTC | #1
On Mon, Jul 11, 2022 at 2:48 PM Delyan Kratunov <delyank@fb.com> wrote:
>
> Similarly to bpf_timer, bpf_delayed_work represents a callback that will
> be executed at a later time, in a different execution context.
>
> Its treatment in maps is practically the same as timers (to a degree
> that perhaps calls for refactoring), except releasing the work does not
> need to release any resources - we will wait for pending executions in
> the program destruction path.
>
> Signed-off-by: Delyan Kratunov <delyank@fb.com>
> ---
>  include/linux/bpf.h            |  9 ++++++++-
>  include/linux/btf.h            |  1 +
>  include/uapi/linux/bpf.h       |  8 ++++++++
>  kernel/bpf/btf.c               | 21 +++++++++++++++++++++
>  kernel/bpf/syscall.c           | 24 ++++++++++++++++++++++--
>  kernel/bpf/verifier.c          |  9 +++++++++
>  tools/include/uapi/linux/bpf.h |  8 ++++++++
>  7 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 0edd7d2c0064..ad9d2cfb0411 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -164,7 +164,8 @@ enum {
>         BPF_MAP_VALUE_OFF_MAX = 8,
>         BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
>                                 1 + /* for bpf_spin_lock */
> -                               1,  /* for bpf_timer */
> +                               1 + /* for bpf_timer */
> +                               1,  /* for bpf_delayed_work */
>  };
>
>  enum bpf_kptr_type {
> @@ -212,6 +213,7 @@ struct bpf_map {
>         int spin_lock_off; /* >=0 valid offset, <0 error */
>         struct bpf_map_value_off *kptr_off_tab;
>         int timer_off; /* >=0 valid offset, <0 error */
> +       int delayed_work_off; /* >=0 valid offset, <0 error */
>         u32 id;
>         int numa_node;
>         u32 btf_key_type_id;
> @@ -256,6 +258,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
>         return map->timer_off >= 0;
>  }
>
> +static inline bool map_value_has_delayed_work(const struct bpf_map *map)
> +{
> +       return map->delayed_work_off >= 0;
> +}
> +
>  static inline bool map_value_has_kptrs(const struct bpf_map *map)
>  {
>         return !IS_ERR_OR_NULL(map->kptr_off_tab);
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 1bfed7fa0428..2b8f473a6aa0 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -132,6 +132,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
>                            u32 expected_offset, u32 expected_size);
>  int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
>  int btf_find_timer(const struct btf *btf, const struct btf_type *t);
> +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t);
>  struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
>                                           const struct btf_type *t);
>  bool btf_type_is_void(const struct btf_type *t);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e81362891596..d68fc4f472f1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6691,6 +6691,14 @@ struct bpf_dynptr {
>         __u64 :64;
>  } __attribute__((aligned(8)));
>
> +struct bpf_delayed_work {
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +} __attribute__((aligned(8)));
> +
>  struct bpf_sysctl {
>         __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
>                                  * Allows 1,2,4-byte read, but no write.
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index f08037c31dd7..e4ab52cc25fe 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3196,6 +3196,7 @@ enum btf_field_type {
>         BTF_FIELD_SPIN_LOCK,
>         BTF_FIELD_TIMER,
>         BTF_FIELD_KPTR,
> +       BTF_FIELD_DELAYED_WORK,
>  };
>
>  enum {
> @@ -3283,6 +3284,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
>                 switch (field_type) {
>                 case BTF_FIELD_SPIN_LOCK:
>                 case BTF_FIELD_TIMER:
> +               case BTF_FIELD_DELAYED_WORK:
>                         ret = btf_find_struct(btf, member_type, off, sz,
>                                               idx < info_cnt ? &info[idx] : &tmp);
>                         if (ret < 0)
> @@ -3333,6 +3335,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
>                 switch (field_type) {
>                 case BTF_FIELD_SPIN_LOCK:
>                 case BTF_FIELD_TIMER:
> +               case BTF_FIELD_DELAYED_WORK:
>                         ret = btf_find_struct(btf, var_type, off, sz,
>                                               idx < info_cnt ? &info[idx] : &tmp);
>                         if (ret < 0)
> @@ -3375,6 +3378,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
>                 sz = sizeof(struct bpf_timer);
>                 align = __alignof__(struct bpf_timer);
>                 break;
> +       case BTF_FIELD_DELAYED_WORK:
> +               name = "bpf_delayed_work";
> +               sz = sizeof(struct bpf_delayed_work);
> +               align = __alignof__(struct bpf_delayed_work);
> +               break;
>         case BTF_FIELD_KPTR:
>                 name = NULL;
>                 sz = sizeof(u64);
> @@ -3421,6 +3429,19 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t)
>         return info.off;
>  }
>
> +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t)
> +{
> +       struct btf_field_info info;
> +       int ret;
> +
> +       ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1);
> +       if (ret < 0)
> +               return ret;
> +       if (!ret)
> +               return -ENOENT;
> +       return info.off;
> +}
> +
>  struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
>                                           const struct btf_type *t)
>  {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 7d5af5b99f0d..041972305344 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -914,10 +914,11 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
>         bool has_spin_lock = map_value_has_spin_lock(map);
>         bool has_timer = map_value_has_timer(map);
>         bool has_kptrs = map_value_has_kptrs(map);
> +       bool has_delayed_work = map_value_has_delayed_work(map);
>         struct bpf_map_off_arr *off_arr;
>         u32 i;
>
> -       if (!has_spin_lock && !has_timer && !has_kptrs) {
> +       if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) {
>                 map->off_arr = NULL;
>                 return 0;
>         }
> @@ -953,6 +954,13 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
>                 }
>                 off_arr->cnt += tab->nr_off;
>         }
> +       if (has_delayed_work) {
> +               i = off_arr->cnt;
> +
> +               off_arr->field_off[i] = map->delayed_work_off;
> +               off_arr->field_sz[i] = sizeof(struct bpf_delayed_work);
> +               off_arr->cnt++;
> +       }
>
>         if (off_arr->cnt == 1)
>                 return 0;
> @@ -1014,6 +1022,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
>                         return -EOPNOTSUPP;
>         }
>
> +       map->delayed_work_off = btf_find_delayed_work(btf, value_type);
> +       if (map_value_has_delayed_work(map)) {
> +               if (map->map_flags & BPF_F_RDONLY_PROG)
> +                       return -EACCES;
> +               if (map->map_type != BPF_MAP_TYPE_HASH &&
> +                   map->map_type != BPF_MAP_TYPE_LRU_HASH &&
> +                   map->map_type != BPF_MAP_TYPE_ARRAY)
> +                       return -EOPNOTSUPP;
> +       }
> +
>         map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
>         if (map_value_has_kptrs(map)) {
>                 if (!bpf_capable()) {
> @@ -1095,6 +1113,7 @@ static int map_create(union bpf_attr *attr)
>
>         map->spin_lock_off = -EINVAL;
>         map->timer_off = -EINVAL;
> +       map->delayed_work_off = -EINVAL;
>         if (attr->btf_key_type_id || attr->btf_value_type_id ||
>             /* Even the map's value is a kernel's struct,
>              * the bpf_prog.o must have BTF to begin with
> @@ -1863,7 +1882,8 @@ static int map_freeze(const union bpf_attr *attr)
>                 return PTR_ERR(map);
>
>         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
> -           map_value_has_timer(map) || map_value_has_kptrs(map)) {
> +           map_value_has_timer(map) || map_value_has_kptrs(map) ||
> +           map_value_has_delayed_work(map)) {

not introduced by you, but shouldn't this check also check
map_value_has_spinlock()?

>                 fdput(f);
>                 return -ENOTSUPP;
>         }

Also check if you need to modify bpf_map_mmap?


> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2859901ffbe3..9fd311b7a1ff 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3817,6 +3817,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
>                         return -EACCES;
>                 }
>         }
> +       if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) {
> +               u32 t = map->delayed_work_off;
> +
> +               if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) &&
> +                    t < reg->umax_value + off + size) {
> +                       verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off);
> +                       return -EACCES;
> +               }
> +       }
>         if (map_value_has_kptrs(map)) {
>                 struct bpf_map_value_off *tab = map->kptr_off_tab;
>                 int i;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index e81362891596..d68fc4f472f1 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -6691,6 +6691,14 @@ struct bpf_dynptr {
>         __u64 :64;
>  } __attribute__((aligned(8)));
>
> +struct bpf_delayed_work {
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +} __attribute__((aligned(8)));
> +
>  struct bpf_sysctl {
>         __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
>                                  * Allows 1,2,4-byte read, but no write.
> --
> 2.36.1
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0edd7d2c0064..ad9d2cfb0411 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -164,7 +164,8 @@  enum {
 	BPF_MAP_VALUE_OFF_MAX = 8,
 	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
 				1 + /* for bpf_spin_lock */
-				1,  /* for bpf_timer */
+				1 + /* for bpf_timer */
+				1,  /* for bpf_delayed_work */
 };
 
 enum bpf_kptr_type {
@@ -212,6 +213,7 @@  struct bpf_map {
 	int spin_lock_off; /* >=0 valid offset, <0 error */
 	struct bpf_map_value_off *kptr_off_tab;
 	int timer_off; /* >=0 valid offset, <0 error */
+	int delayed_work_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
 	u32 btf_key_type_id;
@@ -256,6 +258,11 @@  static inline bool map_value_has_timer(const struct bpf_map *map)
 	return map->timer_off >= 0;
 }
 
+static inline bool map_value_has_delayed_work(const struct bpf_map *map)
+{
+	return map->delayed_work_off >= 0;
+}
+
 static inline bool map_value_has_kptrs(const struct bpf_map *map)
 {
 	return !IS_ERR_OR_NULL(map->kptr_off_tab);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 1bfed7fa0428..2b8f473a6aa0 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -132,6 +132,7 @@  bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t);
 struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 					  const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@  struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_delayed_work {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f08037c31dd7..e4ab52cc25fe 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3196,6 +3196,7 @@  enum btf_field_type {
 	BTF_FIELD_SPIN_LOCK,
 	BTF_FIELD_TIMER,
 	BTF_FIELD_KPTR,
+	BTF_FIELD_DELAYED_WORK,
 };
 
 enum {
@@ -3283,6 +3284,7 @@  static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
 		switch (field_type) {
 		case BTF_FIELD_SPIN_LOCK:
 		case BTF_FIELD_TIMER:
+		case BTF_FIELD_DELAYED_WORK:
 			ret = btf_find_struct(btf, member_type, off, sz,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3333,6 +3335,7 @@  static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 		switch (field_type) {
 		case BTF_FIELD_SPIN_LOCK:
 		case BTF_FIELD_TIMER:
+		case BTF_FIELD_DELAYED_WORK:
 			ret = btf_find_struct(btf, var_type, off, sz,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3375,6 +3378,11 @@  static int btf_find_field(const struct btf *btf, const struct btf_type *t,
 		sz = sizeof(struct bpf_timer);
 		align = __alignof__(struct bpf_timer);
 		break;
+	case BTF_FIELD_DELAYED_WORK:
+		name = "bpf_delayed_work";
+		sz = sizeof(struct bpf_delayed_work);
+		align = __alignof__(struct bpf_delayed_work);
+		break;
 	case BTF_FIELD_KPTR:
 		name = NULL;
 		sz = sizeof(u64);
@@ -3421,6 +3429,19 @@  int btf_find_timer(const struct btf *btf, const struct btf_type *t)
 	return info.off;
 }
 
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t)
+{
+	struct btf_field_info info;
+	int ret;
+
+	ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1);
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return -ENOENT;
+	return info.off;
+}
+
 struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 					  const struct btf_type *t)
 {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d5af5b99f0d..041972305344 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -914,10 +914,11 @@  static int bpf_map_alloc_off_arr(struct bpf_map *map)
 	bool has_spin_lock = map_value_has_spin_lock(map);
 	bool has_timer = map_value_has_timer(map);
 	bool has_kptrs = map_value_has_kptrs(map);
+	bool has_delayed_work = map_value_has_delayed_work(map);
 	struct bpf_map_off_arr *off_arr;
 	u32 i;
 
-	if (!has_spin_lock && !has_timer && !has_kptrs) {
+	if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) {
 		map->off_arr = NULL;
 		return 0;
 	}
@@ -953,6 +954,13 @@  static int bpf_map_alloc_off_arr(struct bpf_map *map)
 		}
 		off_arr->cnt += tab->nr_off;
 	}
+	if (has_delayed_work) {
+		i = off_arr->cnt;
+
+		off_arr->field_off[i] = map->delayed_work_off;
+		off_arr->field_sz[i] = sizeof(struct bpf_delayed_work);
+		off_arr->cnt++;
+	}
 
 	if (off_arr->cnt == 1)
 		return 0;
@@ -1014,6 +1022,16 @@  static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			return -EOPNOTSUPP;
 	}
 
+	map->delayed_work_off = btf_find_delayed_work(btf, value_type);
+	if (map_value_has_delayed_work(map)) {
+		if (map->map_flags & BPF_F_RDONLY_PROG)
+			return -EACCES;
+		if (map->map_type != BPF_MAP_TYPE_HASH &&
+		    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY)
+			return -EOPNOTSUPP;
+	}
+
 	map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
 	if (map_value_has_kptrs(map)) {
 		if (!bpf_capable()) {
@@ -1095,6 +1113,7 @@  static int map_create(union bpf_attr *attr)
 
 	map->spin_lock_off = -EINVAL;
 	map->timer_off = -EINVAL;
+	map->delayed_work_off = -EINVAL;
 	if (attr->btf_key_type_id || attr->btf_value_type_id ||
 	    /* Even the map's value is a kernel's struct,
 	     * the bpf_prog.o must have BTF to begin with
@@ -1863,7 +1882,8 @@  static int map_freeze(const union bpf_attr *attr)
 		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
-	    map_value_has_timer(map) || map_value_has_kptrs(map)) {
+	    map_value_has_timer(map) || map_value_has_kptrs(map) ||
+	    map_value_has_delayed_work(map)) {
 		fdput(f);
 		return -ENOTSUPP;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2859901ffbe3..9fd311b7a1ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3817,6 +3817,15 @@  static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 	}
+	if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) {
+		u32 t = map->delayed_work_off;
+
+		if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) &&
+		     t < reg->umax_value + off + size) {
+			verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off);
+			return -EACCES;
+		}
+	}
 	if (map_value_has_kptrs(map)) {
 		struct bpf_map_value_off *tab = map->kptr_off_tab;
 		int i;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@  struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_delayed_work {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.