diff mbox series

[bpf-next,1/6] bpf: add a pointer of the attached link to bpf_struct_ops_map.

Message ID 20240429213609.487820-2-thinker.li@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Notify user space when a struct_ops object is detached/unregisterd | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail merge-conflict
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0

Commit Message

Kui-Feng Lee April 29, 2024, 9:36 p.m. UTC
To facilitate the upcoming unregistring struct_ops objects from the systems
consuming objects, a pointer of the attached link is added to allow for
accessing the attached link of a bpf_struct_ops_map directly from the map
itself.

Previously, a st_map could be attached to multiple links. This patch now
enforces only one link attached at most.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 kernel/bpf/bpf_struct_ops.c | 47 ++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko May 1, 2024, 5:01 p.m. UTC | #1
On Mon, Apr 29, 2024 at 2:36 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
>
> To facilitate the upcoming unregistring struct_ops objects from the systems
> consuming objects, a pointer of the attached link is added to allow for
> accessing the attached link of a bpf_struct_ops_map directly from the map
> itself.
>
> Previously, a st_map could be attached to multiple links. This patch now
> enforces only one link attached at most.

I'd like to avoid this restriction, in principle. We don't enforce
that BPF program should be attached through a single BPF link, so I
don't think we should allow that for maps. Worst case you can keep a
list of attached links.

>
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>  kernel/bpf/bpf_struct_ops.c | 47 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 86c7884abaf8..072e3416c987 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -20,6 +20,8 @@ struct bpf_struct_ops_value {
>
>  #define MAX_TRAMP_IMAGE_PAGES 8
>
> +struct bpf_struct_ops_link;
> +
>  struct bpf_struct_ops_map {
>         struct bpf_map map;
>         struct rcu_head rcu;
> @@ -39,6 +41,8 @@ struct bpf_struct_ops_map {
>         void *image_pages[MAX_TRAMP_IMAGE_PAGES];
>         /* The owner moduler's btf. */
>         struct btf *btf;
> +       /* The link is attached by this map. */
> +       struct bpf_struct_ops_link __rcu *attached;
>         /* uvalue->data stores the kernel struct
>          * (e.g. tcp_congestion_ops) that is more useful
>          * to userspace than the kvalue.  For example,
> @@ -1048,6 +1052,22 @@ static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
>                 smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY;
>  }
>
> +/* Set the attached link of a map.
> + *
> + * Return the current value of the st_map->attached.
> + */
> +static inline struct bpf_struct_ops_link *map_attached(struct bpf_struct_ops_map *st_map,
> +                                                      struct bpf_struct_ops_link *st_link)
> +{
> +       return unrcu_pointer(cmpxchg(&st_map->attached, NULL, st_link));
> +}
> +
> +/* Reset the attached link of a map */
> +static inline void map_attached_null(struct bpf_struct_ops_map *st_map)
> +{
> +       rcu_assign_pointer(st_map->attached, NULL);
> +}
> +
>  static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>  {
>         struct bpf_struct_ops_link *st_link;
> @@ -1061,6 +1081,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>                  * bpf_struct_ops_link_create() fails to register.
>                  */
>                 st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
> +               map_attached_null(st_map);
>                 bpf_map_put(&st_map->map);
>         }
>         kfree(st_link);
> @@ -1125,9 +1146,21 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>                 goto err_out;
>         }
>
> +       if (likely(st_map != old_st_map) && map_attached(st_map, st_link)) {
> +               /* The map is already in use */
> +               err = -EBUSY;
> +               goto err_out;
> +       }
> +
>         err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
> -       if (err)
> +       if (err) {
> +               if (st_map != old_st_map)
> +                       map_attached_null(st_map);
>                 goto err_out;
> +       }
> +
> +       if (likely(st_map != old_st_map))
> +               map_attached_null(old_st_map);
>
>         bpf_map_inc(new_map);
>         rcu_assign_pointer(st_link->map, new_map);
> @@ -1172,20 +1205,28 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>         }
>         bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
>
> +       if (map_attached(st_map, link)) {
> +               err = -EBUSY;
> +               goto err_out;
> +       }
> +
>         err = bpf_link_prime(&link->link, &link_primer);
>         if (err)
> -               goto err_out;
> +               goto err_out_attached;
>
>         err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
>         if (err) {
>                 bpf_link_cleanup(&link_primer);
> +               /* The link has been free by bpf_link_cleanup() */
>                 link = NULL;
> -               goto err_out;
> +               goto err_out_attached;
>         }
>         RCU_INIT_POINTER(link->map, map);
>
>         return bpf_link_settle(&link_primer);
>
> +err_out_attached:
> +       map_attached_null(st_map);
>  err_out:
>         bpf_map_put(map);
>         kfree(link);
> --
> 2.34.1
>
Kui-Feng Lee May 1, 2024, 10:15 p.m. UTC | #2
On 5/1/24 10:01, Andrii Nakryiko wrote:
> On Mon, Apr 29, 2024 at 2:36 PM Kui-Feng Lee <thinker.li@gmail.com> wrote:
>>
>> To facilitate the upcoming unregistring struct_ops objects from the systems
>> consuming objects, a pointer of the attached link is added to allow for
>> accessing the attached link of a bpf_struct_ops_map directly from the map
>> itself.
>>
>> Previously, a st_map could be attached to multiple links. This patch now
>> enforces only one link attached at most.
> 
> I'd like to avoid this restriction, in principle. We don't enforce
> that BPF program should be attached through a single BPF link, so I
> don't think we should allow that for maps. Worst case you can keep a
> list of attached links.

Agree!

> 
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   kernel/bpf/bpf_struct_ops.c | 47 ++++++++++++++++++++++++++++++++++---
>>   1 file changed, 44 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 86c7884abaf8..072e3416c987 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -20,6 +20,8 @@ struct bpf_struct_ops_value {
>>
>>   #define MAX_TRAMP_IMAGE_PAGES 8
>>
>> +struct bpf_struct_ops_link;
>> +
>>   struct bpf_struct_ops_map {
>>          struct bpf_map map;
>>          struct rcu_head rcu;
>> @@ -39,6 +41,8 @@ struct bpf_struct_ops_map {
>>          void *image_pages[MAX_TRAMP_IMAGE_PAGES];
>>          /* The owner moduler's btf. */
>>          struct btf *btf;
>> +       /* The link is attached by this map. */
>> +       struct bpf_struct_ops_link __rcu *attached;
>>          /* uvalue->data stores the kernel struct
>>           * (e.g. tcp_congestion_ops) that is more useful
>>           * to userspace than the kvalue.  For example,
>> @@ -1048,6 +1052,22 @@ static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
>>                  smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY;
>>   }
>>
>> +/* Set the attached link of a map.
>> + *
>> + * Return the current value of the st_map->attached.
>> + */
>> +static inline struct bpf_struct_ops_link *map_attached(struct bpf_struct_ops_map *st_map,
>> +                                                      struct bpf_struct_ops_link *st_link)
>> +{
>> +       return unrcu_pointer(cmpxchg(&st_map->attached, NULL, st_link));
>> +}
>> +
>> +/* Reset the attached link of a map */
>> +static inline void map_attached_null(struct bpf_struct_ops_map *st_map)
>> +{
>> +       rcu_assign_pointer(st_map->attached, NULL);
>> +}
>> +
>>   static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>>   {
>>          struct bpf_struct_ops_link *st_link;
>> @@ -1061,6 +1081,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>>                   * bpf_struct_ops_link_create() fails to register.
>>                   */
>>                  st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
>> +               map_attached_null(st_map);
>>                  bpf_map_put(&st_map->map);
>>          }
>>          kfree(st_link);
>> @@ -1125,9 +1146,21 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>                  goto err_out;
>>          }
>>
>> +       if (likely(st_map != old_st_map) && map_attached(st_map, st_link)) {
>> +               /* The map is already in use */
>> +               err = -EBUSY;
>> +               goto err_out;
>> +       }
>> +
>>          err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
>> -       if (err)
>> +       if (err) {
>> +               if (st_map != old_st_map)
>> +                       map_attached_null(st_map);
>>                  goto err_out;
>> +       }
>> +
>> +       if (likely(st_map != old_st_map))
>> +               map_attached_null(old_st_map);
>>
>>          bpf_map_inc(new_map);
>>          rcu_assign_pointer(st_link->map, new_map);
>> @@ -1172,20 +1205,28 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>>          }
>>          bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
>>
>> +       if (map_attached(st_map, link)) {
>> +               err = -EBUSY;
>> +               goto err_out;
>> +       }
>> +
>>          err = bpf_link_prime(&link->link, &link_primer);
>>          if (err)
>> -               goto err_out;
>> +               goto err_out_attached;
>>
>>          err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
>>          if (err) {
>>                  bpf_link_cleanup(&link_primer);
>> +               /* The link has been free by bpf_link_cleanup() */
>>                  link = NULL;
>> -               goto err_out;
>> +               goto err_out_attached;
>>          }
>>          RCU_INIT_POINTER(link->map, map);
>>
>>          return bpf_link_settle(&link_primer);
>>
>> +err_out_attached:
>> +       map_attached_null(st_map);
>>   err_out:
>>          bpf_map_put(map);
>>          kfree(link);
>> --
>> 2.34.1
>>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 86c7884abaf8..072e3416c987 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -20,6 +20,8 @@  struct bpf_struct_ops_value {
 
 #define MAX_TRAMP_IMAGE_PAGES 8
 
+struct bpf_struct_ops_link;
+
 struct bpf_struct_ops_map {
 	struct bpf_map map;
 	struct rcu_head rcu;
@@ -39,6 +41,8 @@  struct bpf_struct_ops_map {
 	void *image_pages[MAX_TRAMP_IMAGE_PAGES];
 	/* The owner moduler's btf. */
 	struct btf *btf;
+	/* The link is attached by this map. */
+	struct bpf_struct_ops_link __rcu *attached;
 	/* uvalue->data stores the kernel struct
 	 * (e.g. tcp_congestion_ops) that is more useful
 	 * to userspace than the kvalue.  For example,
@@ -1048,6 +1052,22 @@  static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
 		smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY;
 }
 
+/* Set the attached link of a map.
+ *
+ * Return the current value of the st_map->attached.
+ */
+static inline struct bpf_struct_ops_link *map_attached(struct bpf_struct_ops_map *st_map,
+						       struct bpf_struct_ops_link *st_link)
+{
+	return unrcu_pointer(cmpxchg(&st_map->attached, NULL, st_link));
+}
+
+/* Reset the attached link of a map */
+static inline void map_attached_null(struct bpf_struct_ops_map *st_map)
+{
+	rcu_assign_pointer(st_map->attached, NULL);
+}
+
 static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 {
 	struct bpf_struct_ops_link *st_link;
@@ -1061,6 +1081,7 @@  static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 		 * bpf_struct_ops_link_create() fails to register.
 		 */
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+		map_attached_null(st_map);
 		bpf_map_put(&st_map->map);
 	}
 	kfree(st_link);
@@ -1125,9 +1146,21 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 		goto err_out;
 	}
 
+	if (likely(st_map != old_st_map) && map_attached(st_map, st_link)) {
+		/* The map is already in use */
+		err = -EBUSY;
+		goto err_out;
+	}
+
 	err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data);
-	if (err)
+	if (err) {
+		if (st_map != old_st_map)
+			map_attached_null(st_map);
 		goto err_out;
+	}
+
+	if (likely(st_map != old_st_map))
+		map_attached_null(old_st_map);
 
 	bpf_map_inc(new_map);
 	rcu_assign_pointer(st_link->map, new_map);
@@ -1172,20 +1205,28 @@  int bpf_struct_ops_link_create(union bpf_attr *attr)
 	}
 	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
 
+	if (map_attached(st_map, link)) {
+		err = -EBUSY;
+		goto err_out;
+	}
+
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err)
-		goto err_out;
+		goto err_out_attached;
 
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
+		/* The link has been free by bpf_link_cleanup() */
 		link = NULL;
-		goto err_out;
+		goto err_out_attached;
 	}
 	RCU_INIT_POINTER(link->map, map);
 
 	return bpf_link_settle(&link_primer);
 
+err_out_attached:
+	map_attached_null(st_map);
 err_out:
 	bpf_map_put(map);
 	kfree(link);