diff mbox series

[bpf-next,v3,2/7] bpf: enable detaching links of struct_ops objects.

Message ID 20240510002942.1253354-3-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Notify user space when a struct_ops object is detached/unregistered | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18

Commit Message

Kui-Feng Lee May 10, 2024, 12:29 a.m. UTC
Implement the detach callback in bpf_link_ops for struct_ops so that user
programs can detach a struct_ops link. The subsystems that struct_ops
objects are registered to can also use this callback to detach the links
being passed to them.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 6 deletions(-)

Comments

Martin KaFai Lau May 21, 2024, 1:22 a.m. UTC | #1
On 5/9/24 5:29 PM, Kui-Feng Lee wrote:
> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> +{
> +	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	struct bpf_struct_ops_map *st_map;
> +	struct bpf_map *map;
> +
> +	mutex_lock(&update_mutex);
> +
> +	map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> +	if (!map) {
> +		mutex_unlock(&update_mutex);
> +		return -EINVAL;
> +	}
> +	st_map = container_of(map, struct bpf_struct_ops_map, map);
> +
> +	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> +
> +	rcu_assign_pointer(st_link->map, NULL);
> +	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> +	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
> +	 */
> +	bpf_map_put(&st_map->map);
> +
> +	mutex_unlock(&update_mutex);
> +
> +	return 0;
> +}
> +
>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>   	.dealloc = bpf_struct_ops_map_link_dealloc,
> +	.detach = bpf_struct_ops_map_link_detach,
>   	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>   	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>   	.update_map = bpf_struct_ops_map_link_update,
> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>   	if (err)
>   		goto err_out;
>   
> +	/* Init link->map before calling reg() in case being detached
> +	 * immediately.
> +	 */

It is not obvious in the patch how this (immediate detach by subsystem after 
reg) may work without race, so I think it is easier to ask.

[ I put back the err_out context at the end ]

> +	RCU_INIT_POINTER(link->map, map);
> +
>   	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>   	if (err) {
> +		RCU_INIT_POINTER(link->map, NULL);

In the bpf_struct_ops_map_link_detach() above, the update to link->map is 
protected by the update_mutex. Could you explain how the link->map update to 
NULL is safe here without holding the update_mutex?

>   		bpf_link_cleanup(&link_primer);
> +		/* The link has been free by bpf_link_cleanup() */
>   		link = NULL;
>   		goto err_out;
>   	}
> -	RCU_INIT_POINTER(link->map, map);
>   
>   	return bpf_link_settle(&link_primer);
>

err_out:
	bpf_map_put(map);
	kfree(link);
	return err;
}
Kui-Feng Lee May 21, 2024, 7:30 a.m. UTC | #2
On 5/20/24 18:22, Martin KaFai Lau wrote:
> On 5/9/24 5:29 PM, Kui-Feng Lee wrote:
>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>> +{
>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>> bpf_struct_ops_link, link);
>> +    struct bpf_struct_ops_map *st_map;
>> +    struct bpf_map *map;
>> +
>> +    mutex_lock(&update_mutex);
>> +
>> +    map = rcu_dereference_protected(st_link->map, 
>> lockdep_is_held(&update_mutex));
>> +    if (!map) {
>> +        mutex_unlock(&update_mutex);
>> +        return -EINVAL;
>> +    }
>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>> +
>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>> +
>> +    rcu_assign_pointer(st_link->map, NULL);
>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>> +     */
>> +    bpf_map_put(&st_map->map);
>> +
>> +    mutex_unlock(&update_mutex);
>> +
>> +    return 0;
>> +}
>> +
>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>> +    .detach = bpf_struct_ops_map_link_detach,
>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>       .update_map = bpf_struct_ops_map_link_update,
>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr 
>> *attr)
>>       if (err)
>>           goto err_out;
>> +    /* Init link->map before calling reg() in case being detached
>> +     * immediately.
>> +     */
> 
> It is not obvious in the patch how this (immediate detach by subsystem 
> after reg) may work without race, so I think it is easier to ask.
> 
> [ I put back the err_out context at the end ]
> 
>> +    RCU_INIT_POINTER(link->map, map);
>> +
>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, 
>> &link->link);
>>       if (err) {
>> +        RCU_INIT_POINTER(link->map, NULL);
> 
> In the bpf_struct_ops_map_link_detach() above, the update to link->map 
> is protected by the update_mutex. Could you explain how the link->map 
> update to NULL is safe here without holding the update_mutex?

If err is not zero, it means the subsystem rejects the pair of the
object and the link passing in. So, it has no reasonable to call
bpf_struct_ops_map_link_detach() for this link.

Does it make sense to you?

> 
>>           bpf_link_cleanup(&link_primer);
>> +        /* The link has been free by bpf_link_cleanup() */
>>           link = NULL;
>>           goto err_out;
>>       }

At this point, we don't change the content of the link anymore except
changing link->fd in bpf_link_settle(). So, it should be safe to call
bpf_struct_ops_map_link_detach() from the subsystem.

Should I explain it in a comment if you think it makes sense to you?

>> -    RCU_INIT_POINTER(link->map, map);
>>       return bpf_link_settle(&link_primer);
>>
> 
> err_out:
>      bpf_map_put(map);
>      kfree(link);
>      return err;
> }
> 
>
Martin KaFai Lau May 21, 2024, 6:09 p.m. UTC | #3
On 5/21/24 12:30 AM, Kui-Feng Lee wrote:
> 
> 
> On 5/20/24 18:22, Martin KaFai Lau wrote:
>> On 5/9/24 5:29 PM, Kui-Feng Lee wrote:
>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>> +{
>>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>>> bpf_struct_ops_link, link);
>>> +    struct bpf_struct_ops_map *st_map;
>>> +    struct bpf_map *map;
>>> +
>>> +    mutex_lock(&update_mutex);
>>> +
>>> +    map = rcu_dereference_protected(st_link->map, 
>>> lockdep_is_held(&update_mutex));
>>> +    if (!map) {
>>> +        mutex_unlock(&update_mutex);
>>> +        return -EINVAL;
>>> +    }
>>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>>> +
>>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>> +
>>> +    rcu_assign_pointer(st_link->map, NULL);
>>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>> +     */
>>> +    bpf_map_put(&st_map->map);
>>> +
>>> +    mutex_unlock(&update_mutex);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>>> +    .detach = bpf_struct_ops_map_link_detach,
>>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>>       .update_map = bpf_struct_ops_map_link_update,
>>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>>>       if (err)
>>>           goto err_out;
>>> +    /* Init link->map before calling reg() in case being detached
>>> +     * immediately.
>>> +     */
>>
>> It is not obvious in the patch how this (immediate detach by subsystem after 
>> reg) may work without race, so I think it is easier to ask.
>>
>> [ I put back the err_out context at the end ]
>>
>>> +    RCU_INIT_POINTER(link->map, map);
>>> +
>>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>>>       if (err) {
>>> +        RCU_INIT_POINTER(link->map, NULL);
>>
>> In the bpf_struct_ops_map_link_detach() above, the update to link->map is 
>> protected by the update_mutex. Could you explain how the link->map update to 
>> NULL is safe here without holding the update_mutex?
> 
> If err is not zero, it means the subsystem rejects the pair of the
> object and the link passing in. So, it has no reasonable to call
> bpf_struct_ops_map_link_detach() for this link.
> 
> Does it make sense to you?
> 
>>
>>>           bpf_link_cleanup(&link_primer);
>>> +        /* The link has been free by bpf_link_cleanup() */
>>>           link = NULL;
>>>           goto err_out;
>>>       }
> 
> At this point, we don't change the content of the link anymore except
> changing link->fd in bpf_link_settle(). So, it should be safe to call
> bpf_struct_ops_map_link_detach() from the subsystem.
> 
> Should I explain it in a comment if you think it makes sense to you?

It makes sense and will be useful to get this details in the comment.

> 
>>> -    RCU_INIT_POINTER(link->map, map);
>>>       return bpf_link_settle(&link_primer);
>>>
>>
>> err_out:
>>      bpf_map_put(map);
>>      kfree(link);
>>      return err;
>> }
>>
>>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 1542dded7489..7e270ee15f6e 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1057,9 +1057,6 @@  static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 	st_map = (struct bpf_struct_ops_map *)
 		rcu_dereference_protected(st_link->map, true);
 	if (st_map) {
-		/* st_link->map can be NULL if
-		 * bpf_struct_ops_link_create() fails to register.
-		 */
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
 		bpf_map_put(&st_map->map);
 	}
@@ -1075,7 +1072,8 @@  static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	seq_printf(seq, "map_id:\t%d\n", map->id);
+	if (map)
+		seq_printf(seq, "map_id:\t%d\n", map->id);
 	rcu_read_unlock();
 }
 
@@ -1088,7 +1086,8 @@  static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	info->struct_ops.map_id = map->id;
+	if (map)
+		info->struct_ops.map_id = map->id;
 	rcu_read_unlock();
 	return 0;
 }
@@ -1113,6 +1112,10 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	mutex_lock(&update_mutex);
 
 	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!old_map) {
+		err = -EINVAL;
+		goto err_out;
+	}
 	if (expected_old_map && old_map != expected_old_map) {
 		err = -EPERM;
 		goto err_out;
@@ -1139,8 +1142,37 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	return err;
 }
 
+static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
+
+	mutex_lock(&update_mutex);
+
+	map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!map) {
+		mutex_unlock(&update_mutex);
+		return -EINVAL;
+	}
+	st_map = container_of(map, struct bpf_struct_ops_map, map);
+
+	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
+
+	rcu_assign_pointer(st_link->map, NULL);
+	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
+	 */
+	bpf_map_put(&st_map->map);
+
+	mutex_unlock(&update_mutex);
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_struct_ops_map_lops = {
 	.dealloc = bpf_struct_ops_map_link_dealloc,
+	.detach = bpf_struct_ops_map_link_detach,
 	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
 	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
 	.update_map = bpf_struct_ops_map_link_update,
@@ -1176,13 +1208,19 @@  int bpf_struct_ops_link_create(union bpf_attr *attr)
 	if (err)
 		goto err_out;
 
+	/* Init link->map before calling reg() in case being detached
+	 * immediately.
+	 */
+	RCU_INIT_POINTER(link->map, map);
+
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
 	if (err) {
+		RCU_INIT_POINTER(link->map, NULL);
 		bpf_link_cleanup(&link_primer);
+		/* The link has been free by bpf_link_cleanup() */
 		link = NULL;
 		goto err_out;
 	}
-	RCU_INIT_POINTER(link->map, map);
 
 	return bpf_link_settle(&link_primer);