diff mbox series

[bpf-next,v2,2/6] bpf: enable detaching links of struct_ops objects.

Message ID 20240507055600.2382627-3-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Notify user space when a struct_ops object is detached/unregistered | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat

Commit Message

Kui-Feng Lee May 7, 2024, 5:55 a.m. UTC
Implement the detach callback in bpf_link_ops for struct_ops. The
subsystems that struct_ops objects are registered to can use this callback
to detach the links being passed to them.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 6 deletions(-)

Comments

Martin KaFai Lau May 8, 2024, 11:22 p.m. UTC | #1
On 5/6/24 10:55 PM, Kui-Feng Lee wrote:
> Implement the detach callback in bpf_link_ops for struct_ops. The
> subsystems that struct_ops objects are registered to can use this callback
> to detach the links being passed to them.

The user space can also use the detach. The subsystem is merely reusing the 
similar detach callback if it stores the link during reg().

> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>   kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 44 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 390f8c155135..bd2602982e4d 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -1057,9 +1057,6 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>   	st_map = (struct bpf_struct_ops_map *)
>   		rcu_dereference_protected(st_link->map, true);
>   	if (st_map) {
> -		/* st_link->map can be NULL if
> -		 * bpf_struct_ops_link_create() fails to register.
> -		 */
>   		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, st_link);
>   		bpf_map_put(&st_map->map);
>   	}
> @@ -1075,7 +1072,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
>   	st_link = container_of(link, struct bpf_struct_ops_link, link);
>   	rcu_read_lock();
>   	map = rcu_dereference(st_link->map);
> -	seq_printf(seq, "map_id:\t%d\n", map->id);
> +	if (map)
> +		seq_printf(seq, "map_id:\t%d\n", map->id);
>   	rcu_read_unlock();
>   }
>   
> @@ -1088,7 +1086,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
>   	st_link = container_of(link, struct bpf_struct_ops_link, link);
>   	rcu_read_lock();
>   	map = rcu_dereference(st_link->map);
> -	info->struct_ops.map_id = map->id;
> +	if (map)
> +		info->struct_ops.map_id = map->id;
>   	rcu_read_unlock();
>   	return 0;
>   }
> @@ -1113,6 +1112,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>   	mutex_lock(&update_mutex);
>   
>   	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> +	if (!old_map) {
> +		err = -EINVAL;
> +		goto err_out;
> +	}
>   	if (expected_old_map && old_map != expected_old_map) {
>   		err = -EPERM;
>   		goto err_out;
> @@ -1139,8 +1142,37 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>   	return err;
>   }
>   
> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> +{
> +	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	struct bpf_struct_ops_map *st_map;
> +	struct bpf_map *map;
> +
> +	mutex_lock(&update_mutex);
> +
> +	map = rcu_dereference_protected(st_link->map, true);

nit. s/true/lockdep_is_held(&update_mutex)/

> +	if (!map) {
> +		mutex_unlock(&update_mutex);
> +		return -EINVAL;
> +	}
> +	st_map = container_of(map, struct bpf_struct_ops_map, map);
> +
> +	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> +
> +	rcu_assign_pointer(st_link->map, NULL);
> +	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> +	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
> +	 */
> +	bpf_map_put(&st_map->map);
> +
> +	mutex_unlock(&update_mutex);
> +
> +	return 0;
> +}
> +
>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>   	.dealloc = bpf_struct_ops_map_link_dealloc,
> +	.detach = bpf_struct_ops_map_link_detach,
>   	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>   	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>   	.update_map = bpf_struct_ops_map_link_update,
> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>   	if (err)
>   		goto err_out;
>   
> +	/* Init link->map before calling reg() in case being detached
> +	 * immediately.
> +	 */
> +	RCU_INIT_POINTER(link->map, map);
> +
>   	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>   	if (err) {
> +		rcu_assign_pointer(link->map, NULL);

nit. RCU_INIT_POINTER(link->map, NULL) is fine.

There is a merge conflict with patch 4 also.

pw-bot: cr

>   		bpf_link_cleanup(&link_primer);
> +		/* The link has been free by bpf_link_cleanup() */
>   		link = NULL;
>   		goto err_out;
>   	}
> -	RCU_INIT_POINTER(link->map, map);
>   
>   	return bpf_link_settle(&link_primer);
>
Kui-Feng Lee May 9, 2024, 12:14 a.m. UTC | #2
On 5/8/24 16:22, Martin KaFai Lau wrote:
> On 5/6/24 10:55 PM, Kui-Feng Lee wrote:
>> Implement the detach callback in bpf_link_ops for struct_ops. The
>> subsystems that struct_ops objects are registered to can use this 
>> callback
>> to detach the links being passed to them.
> 
> The user space can also use the detach. The subsystem is merely reusing 
> the similar detach callback if it stores the link during reg().

Sure!

> 
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
>>   1 file changed, 44 insertions(+), 6 deletions(-)
>>
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 390f8c155135..bd2602982e4d 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -1057,9 +1057,6 @@ static void 
>> bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>>       st_map = (struct bpf_struct_ops_map *)
>>           rcu_dereference_protected(st_link->map, true);
>>       if (st_map) {
>> -        /* st_link->map can be NULL if
>> -         * bpf_struct_ops_link_create() fails to register.
>> -         */
>>           st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, 
>> st_link);
>>           bpf_map_put(&st_map->map);
>>       }
>> @@ -1075,7 +1072,8 @@ static void 
>> bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>       rcu_read_lock();
>>       map = rcu_dereference(st_link->map);
>> -    seq_printf(seq, "map_id:\t%d\n", map->id);
>> +    if (map)
>> +        seq_printf(seq, "map_id:\t%d\n", map->id);
>>       rcu_read_unlock();
>>   }
>> @@ -1088,7 +1086,8 @@ static int 
>> bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>       rcu_read_lock();
>>       map = rcu_dereference(st_link->map);
>> -    info->struct_ops.map_id = map->id;
>> +    if (map)
>> +        info->struct_ops.map_id = map->id;
>>       rcu_read_unlock();
>>       return 0;
>>   }
>> @@ -1113,6 +1112,10 @@ static int 
>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>       mutex_lock(&update_mutex);
>>       old_map = rcu_dereference_protected(st_link->map, 
>> lockdep_is_held(&update_mutex));
>> +    if (!old_map) {
>> +        err = -EINVAL;
>> +        goto err_out;
>> +    }
>>       if (expected_old_map && old_map != expected_old_map) {
>>           err = -EPERM;
>>           goto err_out;
>> @@ -1139,8 +1142,37 @@ static int 
>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>       return err;
>>   }
>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>> +{
>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>> bpf_struct_ops_link, link);
>> +    struct bpf_struct_ops_map *st_map;
>> +    struct bpf_map *map;
>> +
>> +    mutex_lock(&update_mutex);
>> +
>> +    map = rcu_dereference_protected(st_link->map, true);
> 
> nit. s/true/lockdep_is_held(&update_mutex)/


I thought it is protected by the refcount holding by the caller.
WDYT?


> 
>> +    if (!map) {
>> +        mutex_unlock(&update_mutex);
>> +        return -EINVAL;
>> +    }
>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>> +
>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>> +
>> +    rcu_assign_pointer(st_link->map, NULL);
>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>> +     */
>> +    bpf_map_put(&st_map->map);
>> +
>> +    mutex_unlock(&update_mutex);
>> +
>> +    return 0;
>> +}
>> +
>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>> +    .detach = bpf_struct_ops_map_link_detach,
>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>       .update_map = bpf_struct_ops_map_link_update,
>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr 
>> *attr)
>>       if (err)
>>           goto err_out;
>> +    /* Init link->map before calling reg() in case being detached
>> +     * immediately.
>> +     */
>> +    RCU_INIT_POINTER(link->map, map);
>> +
>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, 
>> &link->link);
>>       if (err) {
>> +        rcu_assign_pointer(link->map, NULL);
> 
> nit. RCU_INIT_POINTER(link->map, NULL) is fine.

Got it!

> 
> There is a merge conflict with patch 4 also.

What do you mean here? Do you mean the patch 4 can not be applied on top
of the patch 2?

> 
> pw-bot: cr
> 
>>           bpf_link_cleanup(&link_primer);
>> +        /* The link has been free by bpf_link_cleanup() */
>>           link = NULL;
>>           goto err_out;
>>       }
>> -    RCU_INIT_POINTER(link->map, map);
>>       return bpf_link_settle(&link_primer);
>
Martin KaFai Lau May 9, 2024, 12:36 a.m. UTC | #3
On 5/8/24 5:14 PM, Kui-Feng Lee wrote:
> 
> 
> On 5/8/24 16:22, Martin KaFai Lau wrote:
>> On 5/6/24 10:55 PM, Kui-Feng Lee wrote:
>>> Implement the detach callback in bpf_link_ops for struct_ops. The
>>> subsystems that struct_ops objects are registered to can use this callback
>>> to detach the links being passed to them.
>>
>> The user space can also use the detach. The subsystem is merely reusing the 
>> similar detach callback if it stores the link during reg().
> 
> Sure!
> 
>>
>>>
>>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>>> ---
>>>   kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
>>>   1 file changed, 44 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>>> index 390f8c155135..bd2602982e4d 100644
>>> --- a/kernel/bpf/bpf_struct_ops.c
>>> +++ b/kernel/bpf/bpf_struct_ops.c
>>> @@ -1057,9 +1057,6 @@ static void bpf_struct_ops_map_link_dealloc(struct 
>>> bpf_link *link)
>>>       st_map = (struct bpf_struct_ops_map *)
>>>           rcu_dereference_protected(st_link->map, true);
>>>       if (st_map) {
>>> -        /* st_link->map can be NULL if
>>> -         * bpf_struct_ops_link_create() fails to register.
>>> -         */
>>>           st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, st_link);
>>>           bpf_map_put(&st_map->map);
>>>       }
>>> @@ -1075,7 +1072,8 @@ static void bpf_struct_ops_map_link_show_fdinfo(const 
>>> struct bpf_link *link,
>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>       rcu_read_lock();
>>>       map = rcu_dereference(st_link->map);
>>> -    seq_printf(seq, "map_id:\t%d\n", map->id);
>>> +    if (map)
>>> +        seq_printf(seq, "map_id:\t%d\n", map->id);
>>>       rcu_read_unlock();
>>>   }
>>> @@ -1088,7 +1086,8 @@ static int bpf_struct_ops_map_link_fill_link_info(const 
>>> struct bpf_link *link,
>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>       rcu_read_lock();
>>>       map = rcu_dereference(st_link->map);
>>> -    info->struct_ops.map_id = map->id;
>>> +    if (map)
>>> +        info->struct_ops.map_id = map->id;
>>>       rcu_read_unlock();
>>>       return 0;
>>>   }
>>> @@ -1113,6 +1112,10 @@ static int bpf_struct_ops_map_link_update(struct 
>>> bpf_link *link, struct bpf_map
>>>       mutex_lock(&update_mutex);
>>>       old_map = rcu_dereference_protected(st_link->map, 
>>> lockdep_is_held(&update_mutex));
>>> +    if (!old_map) {
>>> +        err = -EINVAL;
>>> +        goto err_out;
>>> +    }
>>>       if (expected_old_map && old_map != expected_old_map) {
>>>           err = -EPERM;
>>>           goto err_out;
>>> @@ -1139,8 +1142,37 @@ static int bpf_struct_ops_map_link_update(struct 
>>> bpf_link *link, struct bpf_map
>>>       return err;
>>>   }
>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>> +{
>>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>>> bpf_struct_ops_link, link);
>>> +    struct bpf_struct_ops_map *st_map;
>>> +    struct bpf_map *map;
>>> +
>>> +    mutex_lock(&update_mutex);
>>> +
>>> +    map = rcu_dereference_protected(st_link->map, true);
>>
>> nit. s/true/lockdep_is_held(&update_mutex)/
> 
> 
> I thought it is protected by the refcount holding by the caller.
> WDYT?

st_link->map is the one with __rcu tag and "!map" is tested next. I don't see 
how these imply the map pointer is protected by refcount. Can you explain?

> 
> 
>>
>>> +    if (!map) {
>>> +        mutex_unlock(&update_mutex);
>>> +        return -EINVAL;
>>> +    }
>>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>>> +
>>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>> +
>>> +    rcu_assign_pointer(st_link->map, NULL);
>>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>> +     */
>>> +    bpf_map_put(&st_map->map);
>>> +
>>> +    mutex_unlock(&update_mutex);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>>> +    .detach = bpf_struct_ops_map_link_detach,
>>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>>       .update_map = bpf_struct_ops_map_link_update,
>>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>>>       if (err)
>>>           goto err_out;
>>> +    /* Init link->map before calling reg() in case being detached
>>> +     * immediately.
>>> +     */
>>> +    RCU_INIT_POINTER(link->map, map);
>>> +
>>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>>>       if (err) {
>>> +        rcu_assign_pointer(link->map, NULL);
>>
>> nit. RCU_INIT_POINTER(link->map, NULL) is fine.
> 
> Got it!
> 
>>
>> There is a merge conflict with patch 4 also.
> 
> What do you mean here? Do you mean the patch 4 can not be applied on top
> of the patch 2?

Please monitor the bpf CI report.

bpf CI complains: 
https://patchwork.kernel.org/project/netdevbpf/patch/20240507055600.2382627-2-thinker.li@gmail.com/

snippet of the error:

Applying: bpf: enable detaching links of struct_ops objects.
Applying: bpf: support epoll from bpf struct_ops links.
Applying: selftests/bpf: test struct_ops with epoll
Patch failed at 0004 selftests/bpf: test struct_ops with epoll

> 
>>
>> pw-bot: cr
>>
>>>           bpf_link_cleanup(&link_primer);
>>> +        /* The link has been free by bpf_link_cleanup() */
>>>           link = NULL;
>>>           goto err_out;
>>>       }
>>> -    RCU_INIT_POINTER(link->map, map);
>>>       return bpf_link_settle(&link_primer);
>>
Kui-Feng Lee May 9, 2024, 12:46 a.m. UTC | #4
On 5/8/24 17:14, Kui-Feng Lee wrote:
> 
> 
> On 5/8/24 16:22, Martin KaFai Lau wrote:
>> On 5/6/24 10:55 PM, Kui-Feng Lee wrote:
>>> Implement the detach callback in bpf_link_ops for struct_ops. The
>>> subsystems that struct_ops objects are registered to can use this 
>>> callback
>>> to detach the links being passed to them.
>>
>> The user space can also use the detach. The subsystem is merely 
>> reusing the similar detach callback if it stores the link during reg().
> 
> Sure!
> 
>>
>>>
>>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>>> ---
>>>   kernel/bpf/bpf_struct_ops.c | 50 ++++++++++++++++++++++++++++++++-----
>>>   1 file changed, 44 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>>> index 390f8c155135..bd2602982e4d 100644
>>> --- a/kernel/bpf/bpf_struct_ops.c
>>> +++ b/kernel/bpf/bpf_struct_ops.c
>>> @@ -1057,9 +1057,6 @@ static void 
>>> bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>>>       st_map = (struct bpf_struct_ops_map *)
>>>           rcu_dereference_protected(st_link->map, true);
>>>       if (st_map) {
>>> -        /* st_link->map can be NULL if
>>> -         * bpf_struct_ops_link_create() fails to register.
>>> -         */
>>>           st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, 
>>> st_link);
>>>           bpf_map_put(&st_map->map);
>>>       }
>>> @@ -1075,7 +1072,8 @@ static void 
>>> bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>       rcu_read_lock();
>>>       map = rcu_dereference(st_link->map);
>>> -    seq_printf(seq, "map_id:\t%d\n", map->id);
>>> +    if (map)
>>> +        seq_printf(seq, "map_id:\t%d\n", map->id);
>>>       rcu_read_unlock();
>>>   }
>>> @@ -1088,7 +1086,8 @@ static int 
>>> bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>       rcu_read_lock();
>>>       map = rcu_dereference(st_link->map);
>>> -    info->struct_ops.map_id = map->id;
>>> +    if (map)
>>> +        info->struct_ops.map_id = map->id;
>>>       rcu_read_unlock();
>>>       return 0;
>>>   }
>>> @@ -1113,6 +1112,10 @@ static int 
>>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>>       mutex_lock(&update_mutex);
>>>       old_map = rcu_dereference_protected(st_link->map, 
>>> lockdep_is_held(&update_mutex));
>>> +    if (!old_map) {
>>> +        err = -EINVAL;
>>> +        goto err_out;
>>> +    }
>>>       if (expected_old_map && old_map != expected_old_map) {
>>>           err = -EPERM;
>>>           goto err_out;
>>> @@ -1139,8 +1142,37 @@ static int 
>>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>>       return err;
>>>   }
>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>> +{
>>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>>> bpf_struct_ops_link, link);
>>> +    struct bpf_struct_ops_map *st_map;
>>> +    struct bpf_map *map;
>>> +
>>> +    mutex_lock(&update_mutex);
>>> +
>>> +    map = rcu_dereference_protected(st_link->map, true);
>>
>> nit. s/true/lockdep_is_held(&update_mutex)/
> 
> 
> I thought it is protected by the refcount holding by the caller.
> WDYT?
> 
> 
>>
>>> +    if (!map) {
>>> +        mutex_unlock(&update_mutex);
>>> +        return -EINVAL;
>>> +    }
>>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>>> +
>>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>> +
>>> +    rcu_assign_pointer(st_link->map, NULL);
>>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>> +     */
>>> +    bpf_map_put(&st_map->map);
>>> +
>>> +    mutex_unlock(&update_mutex);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>>> +    .detach = bpf_struct_ops_map_link_detach,
>>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>>       .update_map = bpf_struct_ops_map_link_update,
>>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union bpf_attr 
>>> *attr)
>>>       if (err)
>>>           goto err_out;
>>> +    /* Init link->map before calling reg() in case being detached
>>> +     * immediately.
>>> +     */
>>> +    RCU_INIT_POINTER(link->map, map);
>>> +
>>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, 
>>> &link->link);
>>>       if (err) {
>>> +        rcu_assign_pointer(link->map, NULL);
>>
>> nit. RCU_INIT_POINTER(link->map, NULL) is fine.
> 
> Got it!
> 
>>
>> There is a merge conflict with patch 4 also.
> 
> What do you mean here? Do you mean the patch 4 can not be applied on top
> of the patch 2?

I have seen it after rebasing my local repository.

> 
>>
>> pw-bot: cr
>>
>>>           bpf_link_cleanup(&link_primer);
>>> +        /* The link has been free by bpf_link_cleanup() */
>>>           link = NULL;
>>>           goto err_out;
>>>       }
>>> -    RCU_INIT_POINTER(link->map, map);
>>>       return bpf_link_settle(&link_primer);
>>
Kui-Feng Lee May 9, 2024, 4:59 p.m. UTC | #5
On 5/8/24 17:36, Martin KaFai Lau wrote:
> On 5/8/24 5:14 PM, Kui-Feng Lee wrote:
>>
>>
>> On 5/8/24 16:22, Martin KaFai Lau wrote:
>>> On 5/6/24 10:55 PM, Kui-Feng Lee wrote:
>>>> Implement the detach callback in bpf_link_ops for struct_ops. The
>>>> subsystems that struct_ops objects are registered to can use this 
>>>> callback
>>>> to detach the links being passed to them.
>>>
>>> The user space can also use the detach. The subsystem is merely 
>>> reusing the similar detach callback if it stores the link during reg().
>>
>> Sure!
>>
>>>
>>>>
>>>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>>>> ---
>>>>   kernel/bpf/bpf_struct_ops.c | 50 
>>>> ++++++++++++++++++++++++++++++++-----
>>>>   1 file changed, 44 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>>>> index 390f8c155135..bd2602982e4d 100644
>>>> --- a/kernel/bpf/bpf_struct_ops.c
>>>> +++ b/kernel/bpf/bpf_struct_ops.c
>>>> @@ -1057,9 +1057,6 @@ static void 
>>>> bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
>>>>       st_map = (struct bpf_struct_ops_map *)
>>>>           rcu_dereference_protected(st_link->map, true);
>>>>       if (st_map) {
>>>> -        /* st_link->map can be NULL if
>>>> -         * bpf_struct_ops_link_create() fails to register.
>>>> -         */
>>>>           st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, 
>>>> st_link);
>>>>           bpf_map_put(&st_map->map);
>>>>       }
>>>> @@ -1075,7 +1072,8 @@ static void 
>>>> bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
>>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>>       rcu_read_lock();
>>>>       map = rcu_dereference(st_link->map);
>>>> -    seq_printf(seq, "map_id:\t%d\n", map->id);
>>>> +    if (map)
>>>> +        seq_printf(seq, "map_id:\t%d\n", map->id);
>>>>       rcu_read_unlock();
>>>>   }
>>>> @@ -1088,7 +1086,8 @@ static int 
>>>> bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
>>>>       st_link = container_of(link, struct bpf_struct_ops_link, link);
>>>>       rcu_read_lock();
>>>>       map = rcu_dereference(st_link->map);
>>>> -    info->struct_ops.map_id = map->id;
>>>> +    if (map)
>>>> +        info->struct_ops.map_id = map->id;
>>>>       rcu_read_unlock();
>>>>       return 0;
>>>>   }
>>>> @@ -1113,6 +1112,10 @@ static int 
>>>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>>>       mutex_lock(&update_mutex);
>>>>       old_map = rcu_dereference_protected(st_link->map, 
>>>> lockdep_is_held(&update_mutex));
>>>> +    if (!old_map) {
>>>> +        err = -EINVAL;
>>>> +        goto err_out;
>>>> +    }
>>>>       if (expected_old_map && old_map != expected_old_map) {
>>>>           err = -EPERM;
>>>>           goto err_out;
>>>> @@ -1139,8 +1142,37 @@ static int 
>>>> bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
>>>>       return err;
>>>>   }
>>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>>> +{
>>>> +    struct bpf_struct_ops_link *st_link = container_of(link, struct 
>>>> bpf_struct_ops_link, link);
>>>> +    struct bpf_struct_ops_map *st_map;
>>>> +    struct bpf_map *map;
>>>> +
>>>> +    mutex_lock(&update_mutex);
>>>> +
>>>> +    map = rcu_dereference_protected(st_link->map, true);
>>>
>>> nit. s/true/lockdep_is_held(&update_mutex)/
>>
>>
>> I thought it is protected by the refcount holding by the caller.
>> WDYT?
> 
> st_link->map is the one with __rcu tag and "!map" is tested next. I 
> don't see how these imply the map pointer is protected by refcount. Can 
> you explain?
> 
Ok! You are right. I confused links with maps.

>>
>>
>>>
>>>> +    if (!map) {
>>>> +        mutex_unlock(&update_mutex);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +    st_map = container_of(map, struct bpf_struct_ops_map, map);
>>>> +
>>>> +    st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>>> +
>>>> +    rcu_assign_pointer(st_link->map, NULL);
>>>> +    /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>>> +     * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>>> +     */
>>>> +    bpf_map_put(&st_map->map);
>>>> +
>>>> +    mutex_unlock(&update_mutex);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>>>       .dealloc = bpf_struct_ops_map_link_dealloc,
>>>> +    .detach = bpf_struct_ops_map_link_detach,
>>>>       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>>>       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>>>       .update_map = bpf_struct_ops_map_link_update,
>>>> @@ -1176,13 +1208,19 @@ int bpf_struct_ops_link_create(union 
>>>> bpf_attr *attr)
>>>>       if (err)
>>>>           goto err_out;
>>>> +    /* Init link->map before calling reg() in case being detached
>>>> +     * immediately.
>>>> +     */
>>>> +    RCU_INIT_POINTER(link->map, map);
>>>> +
>>>>       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, 
>>>> &link->link);
>>>>       if (err) {
>>>> +        rcu_assign_pointer(link->map, NULL);
>>>
>>> nit. RCU_INIT_POINTER(link->map, NULL) is fine.
>>
>> Got it!
>>
>>>
>>> There is a merge conflict with patch 4 also.
>>
>> What do you mean here? Do you mean the patch 4 can not be applied on top
>> of the patch 2?
> 
> Please monitor the bpf CI report.
> 
> bpf CI complains: 
> https://patchwork.kernel.org/project/netdevbpf/patch/20240507055600.2382627-2-thinker.li@gmail.com/
> 
> snippet of the error:
> 
> Applying: bpf: enable detaching links of struct_ops objects.
> Applying: bpf: support epoll from bpf struct_ops links.
> Applying: selftests/bpf: test struct_ops with epoll
> Patch failed at 0004 selftests/bpf: test struct_ops with epoll

Yes! I found it when I rebased local repository.

> 
>>
>>>
>>> pw-bot: cr
>>>
>>>>           bpf_link_cleanup(&link_primer);
>>>> +        /* The link has been free by bpf_link_cleanup() */
>>>>           link = NULL;
>>>>           goto err_out;
>>>>       }
>>>> -    RCU_INIT_POINTER(link->map, map);
>>>>       return bpf_link_settle(&link_primer);
>>>
>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 390f8c155135..bd2602982e4d 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1057,9 +1057,6 @@  static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 	st_map = (struct bpf_struct_ops_map *)
 		rcu_dereference_protected(st_link->map, true);
 	if (st_map) {
-		/* st_link->map can be NULL if
-		 * bpf_struct_ops_link_create() fails to register.
-		 */
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, st_link);
 		bpf_map_put(&st_map->map);
 	}
@@ -1075,7 +1072,8 @@  static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	seq_printf(seq, "map_id:\t%d\n", map->id);
+	if (map)
+		seq_printf(seq, "map_id:\t%d\n", map->id);
 	rcu_read_unlock();
 }
 
@@ -1088,7 +1086,8 @@  static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	info->struct_ops.map_id = map->id;
+	if (map)
+		info->struct_ops.map_id = map->id;
 	rcu_read_unlock();
 	return 0;
 }
@@ -1113,6 +1112,10 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	mutex_lock(&update_mutex);
 
 	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!old_map) {
+		err = -EINVAL;
+		goto err_out;
+	}
 	if (expected_old_map && old_map != expected_old_map) {
 		err = -EPERM;
 		goto err_out;
@@ -1139,8 +1142,37 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	return err;
 }
 
+static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
+
+	mutex_lock(&update_mutex);
+
+	map = rcu_dereference_protected(st_link->map, true);
+	if (!map) {
+		mutex_unlock(&update_mutex);
+		return -EINVAL;
+	}
+	st_map = container_of(map, struct bpf_struct_ops_map, map);
+
+	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
+
+	rcu_assign_pointer(st_link->map, NULL);
+	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
+	 */
+	bpf_map_put(&st_map->map);
+
+	mutex_unlock(&update_mutex);
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_struct_ops_map_lops = {
 	.dealloc = bpf_struct_ops_map_link_dealloc,
+	.detach = bpf_struct_ops_map_link_detach,
 	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
 	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
 	.update_map = bpf_struct_ops_map_link_update,
@@ -1176,13 +1208,19 @@  int bpf_struct_ops_link_create(union bpf_attr *attr)
 	if (err)
 		goto err_out;
 
+	/* Init link->map before calling reg() in case being detached
+	 * immediately.
+	 */
+	RCU_INIT_POINTER(link->map, map);
+
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
 	if (err) {
+		rcu_assign_pointer(link->map, NULL);
 		bpf_link_cleanup(&link_primer);
+		/* The link has been free by bpf_link_cleanup() */
 		link = NULL;
 		goto err_out;
 	}
-	RCU_INIT_POINTER(link->map, map);
 
 	return bpf_link_settle(&link_primer);