diff mbox series

[bpf-next,15/19] bpf: Disable migration before calling ops->map_free()

Message ID 20250106081900.1665573-16-houtao@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next,01/19] bpf: Remove migrate_{disable|enable} from LPM trie | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format fail Series does not have a cover letter; Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 65 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17

Commit Message

Hou Tao Jan. 6, 2025, 8:18 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Disabling migration before calling ops->map_free() to simplify the
freeing of map values or special fields allocated from bpf memory
allocator.

After disabling migration in bpf_map_free(), there is no need for
additional migration_{disable|enable} pairs in the ->map_free()
callbacks. Remove these redundant invocations.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/arraymap.c          | 2 --
 kernel/bpf/bpf_local_storage.c | 2 --
 kernel/bpf/hashtab.c           | 2 --
 kernel/bpf/range_tree.c        | 2 --
 kernel/bpf/syscall.c           | 8 +++++++-
 5 files changed, 7 insertions(+), 9 deletions(-)

Comments

Alexei Starovoitov Jan. 6, 2025, 10:24 p.m. UTC | #1
On Mon, Jan 6, 2025 at 12:07 AM Hou Tao <houtao@huaweicloud.com> wrote:
>
> From: Hou Tao <houtao1@huawei.com>
>
> Disabling migration before calling ops->map_free() to simplify the
> freeing of map values or special fields allocated from bpf memory
> allocator.
>
> After disabling migration in bpf_map_free(), there is no need for
> additional migration_{disable|enable} pairs in the ->map_free()
> callbacks. Remove these redundant invocations.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  kernel/bpf/arraymap.c          | 2 --
>  kernel/bpf/bpf_local_storage.c | 2 --
>  kernel/bpf/hashtab.c           | 2 --
>  kernel/bpf/range_tree.c        | 2 --
>  kernel/bpf/syscall.c           | 8 +++++++-
>  5 files changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 451737493b17..eb28c0f219ee 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -455,7 +455,6 @@ static void array_map_free(struct bpf_map *map)
>         struct bpf_array *array = container_of(map, struct bpf_array, map);
>         int i;
>
> -       migrate_disable();
>         if (!IS_ERR_OR_NULL(map->record)) {
>                 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
>                         for (i = 0; i < array->map.max_entries; i++) {
> @@ -472,7 +471,6 @@ static void array_map_free(struct bpf_map *map)
>                                 bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
>                 }
>         }
> -       migrate_enable();
>
>         if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
>                 bpf_array_free_percpu(array);
> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
> index b649cf736438..12cf6382175e 100644
> --- a/kernel/bpf/bpf_local_storage.c
> +++ b/kernel/bpf/bpf_local_storage.c
> @@ -905,13 +905,11 @@ void bpf_local_storage_map_free(struct bpf_map *map,
>                 while ((selem = hlist_entry_safe(
>                                 rcu_dereference_raw(hlist_first_rcu(&b->list)),
>                                 struct bpf_local_storage_elem, map_node))) {
> -                       migrate_disable();
>                         if (busy_counter)
>                                 this_cpu_inc(*busy_counter);
>                         bpf_selem_unlink(selem, true);
>                         if (busy_counter)
>                                 this_cpu_dec(*busy_counter);
> -                       migrate_enable();
>                         cond_resched_rcu();
>                 }
>                 rcu_read_unlock();
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 8bf1ad326e02..6051f8a39fec 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1570,14 +1570,12 @@ static void htab_map_free(struct bpf_map *map)
>          * underneath and is responsible for waiting for callbacks to finish
>          * during bpf_mem_alloc_destroy().
>          */
> -       migrate_disable();
>         if (!htab_is_prealloc(htab)) {
>                 delete_all_elements(htab);
>         } else {
>                 htab_free_prealloced_fields(htab);
>                 prealloc_destroy(htab);
>         }
> -       migrate_enable();
>
>         bpf_map_free_elem_count(map);
>         free_percpu(htab->extra_elems);
> diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
> index 5bdf9aadca3a..37b80a23ae1a 100644
> --- a/kernel/bpf/range_tree.c
> +++ b/kernel/bpf/range_tree.c
> @@ -259,9 +259,7 @@ void range_tree_destroy(struct range_tree *rt)
>
>         while ((rn = range_it_iter_first(rt, 0, -1U))) {
>                 range_it_remove(rn, rt);
> -               migrate_disable();
>                 bpf_mem_free(&bpf_global_ma, rn);
> -               migrate_enable();
>         }
>  }
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 0503ce1916b6..e7a41abe4809 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -835,8 +835,14 @@ static void bpf_map_free(struct bpf_map *map)
>         struct btf_record *rec = map->record;
>         struct btf *btf = map->btf;
>
> -       /* implementation dependent freeing */
> +       /* implementation dependent freeing. Disabling migration to simplify
> +        * the free of values or special fields allocated from bpf memory
> +        * allocator.
> +        */
> +       migrate_disable();
>         map->ops->map_free(map);
> +       migrate_enable();
> +

I was about to comment on patches 10-13 that it's
better to do it in bpf_map_free(), but then I got to this patch.
All makes sense, but the patch breakdown is too fine grain.
Patches 10-13 introduce migrate pairs only to be deleted
in patch 15. Please squash them into one patch.

Also you mention in the cover letter:

> Considering the bpf-next CI is broken

What is this about?

The cant_migrate() additions throughout looks
a bit out of place. All that code doesn't care about migrations.
Only bpf_ma code does. Let's add it there instead?
The stack trace will tell us the caller anyway,
so no information lost.

Overall it looks great.

pw-bot: cr
Hou Tao Jan. 7, 2025, 1:40 a.m. UTC | #2
Hi,

On 1/7/2025 6:24 AM, Alexei Starovoitov wrote:
> On Mon, Jan 6, 2025 at 12:07 AM Hou Tao <houtao@huaweicloud.com> wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> Disabling migration before calling ops->map_free() to simplify the
>> freeing of map values or special fields allocated from bpf memory
>> allocator.
>>
>> After disabling migration in bpf_map_free(), there is no need for
>> additional migration_{disable|enable} pairs in the ->map_free()
>> callbacks. Remove these redundant invocations.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  kernel/bpf/arraymap.c          | 2 --
>>  kernel/bpf/bpf_local_storage.c | 2 --
>>  kernel/bpf/hashtab.c           | 2 --
>>  kernel/bpf/range_tree.c        | 2 --
>>  kernel/bpf/syscall.c           | 8 +++++++-
>>  5 files changed, 7 insertions(+), 9 deletions(-)
>>
>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>> index 451737493b17..eb28c0f219ee 100644
>> --- a/kernel/bpf/arraymap.c
>> +++ b/kernel/bpf/arraymap.c
>> @@ -455,7 +455,6 @@ static void array_map_free(struct bpf_map *map)
>>         struct bpf_array *array = container_of(map, struct bpf_array, map);
>>         int i;
>>
>> -       migrate_disable();
>>         if (!IS_ERR_OR_NULL(map->record)) {
>>                 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
>>                         for (i = 0; i < array->map.max_entries; i++) {
>> @@ -472,7 +471,6 @@ static void array_map_free(struct bpf_map *map)
>>                                 bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
>>                 }
>>         }
>> -       migrate_enable();
>>
>>         if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
>>                 bpf_array_free_percpu(array);
>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
>> index b649cf736438..12cf6382175e 100644
>> --- a/kernel/bpf/bpf_local_storage.c
>> +++ b/kernel/bpf/bpf_local_storage.c
>> @@ -905,13 +905,11 @@ void bpf_local_storage_map_free(struct bpf_map *map,
>>                 while ((selem = hlist_entry_safe(
>>                                 rcu_dereference_raw(hlist_first_rcu(&b->list)),
>>                                 struct bpf_local_storage_elem, map_node))) {
>> -                       migrate_disable();
>>                         if (busy_counter)
>>                                 this_cpu_inc(*busy_counter);
>>                         bpf_selem_unlink(selem, true);
>>                         if (busy_counter)
>>                                 this_cpu_dec(*busy_counter);
>> -                       migrate_enable();
>>                         cond_resched_rcu();
>>                 }
>>                 rcu_read_unlock();
>> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
>> index 8bf1ad326e02..6051f8a39fec 100644
>> --- a/kernel/bpf/hashtab.c
>> +++ b/kernel/bpf/hashtab.c
>> @@ -1570,14 +1570,12 @@ static void htab_map_free(struct bpf_map *map)
>>          * underneath and is responsible for waiting for callbacks to finish
>>          * during bpf_mem_alloc_destroy().
>>          */
>> -       migrate_disable();
>>         if (!htab_is_prealloc(htab)) {
>>                 delete_all_elements(htab);
>>         } else {
>>                 htab_free_prealloced_fields(htab);
>>                 prealloc_destroy(htab);
>>         }
>> -       migrate_enable();
>>
>>         bpf_map_free_elem_count(map);
>>         free_percpu(htab->extra_elems);
>> diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
>> index 5bdf9aadca3a..37b80a23ae1a 100644
>> --- a/kernel/bpf/range_tree.c
>> +++ b/kernel/bpf/range_tree.c
>> @@ -259,9 +259,7 @@ void range_tree_destroy(struct range_tree *rt)
>>
>>         while ((rn = range_it_iter_first(rt, 0, -1U))) {
>>                 range_it_remove(rn, rt);
>> -               migrate_disable();
>>                 bpf_mem_free(&bpf_global_ma, rn);
>> -               migrate_enable();
>>         }
>>  }
>>
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 0503ce1916b6..e7a41abe4809 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -835,8 +835,14 @@ static void bpf_map_free(struct bpf_map *map)
>>         struct btf_record *rec = map->record;
>>         struct btf *btf = map->btf;
>>
>> -       /* implementation dependent freeing */
>> +       /* implementation dependent freeing. Disabling migration to simplify
>> +        * the free of values or special fields allocated from bpf memory
>> +        * allocator.
>> +        */
>> +       migrate_disable();
>>         map->ops->map_free(map);
>> +       migrate_enable();
>> +
> I was about to comment on patches 10-13 that it's
> better to do it in bpf_map_free(), but then I got to this patch.
> All makes sense, but the patch breakdown is too fine grain.
> Patches 10-13 introduce migrate pairs only to be deleted
> in patch 15. Please squash them into one patch.

OK. However I need to argue for the fine grained break down. The
original though is that if disabling migration for ->map_free callback
for all maps introduces some problems, we could revert the patch #15
separately instead of reverting the squashed patch and moving the
migrate_{disable|enable}() pair to maps which are OK with that change
again.  What do you think ?
>
> Also you mention in the cover letter:
>
>> Considering the bpf-next CI is broken
> What is this about?

Er, I said it wrong. It is my local bpf-next setup. A few days ago, when
I tried to verify the patch by using bpf_next/for-next treee, the
running of test_maps and test_progs failed. Will check today that
whether it is OK.
>
> The cant_migrate() additions throughout looks
> a bit out of place. All that code doesn't care about migrations.
> Only bpf_ma code does. Let's add it there instead?
> The stack trace will tell us the caller anyway,
> so no information lost.

OK. However bpf_ma is not the only one which needs disabled migration.
The reason that bpf_ma needs migrate_disable() is the use of
this_cpu_ptr(). However, there are many places in bpf which use
this_cpu_ptr() (e.g., bpf_for_each_array_elem) and this_cpu_{in|del}
pair (e.g., bpf_cgrp_storage_lock).  I will check the cant_migrate which
can be removed in v2.
>
> Overall it looks great.

Thanks for these suggestions.
>
> pw-bot: cr
> .
Alexei Starovoitov Jan. 7, 2025, 2:24 a.m. UTC | #3
On Mon, Jan 6, 2025 at 5:40 PM Hou Tao <houtao@huaweicloud.com> wrote:
>
> Hi,
>
> On 1/7/2025 6:24 AM, Alexei Starovoitov wrote:
> > On Mon, Jan 6, 2025 at 12:07 AM Hou Tao <houtao@huaweicloud.com> wrote:
> >> From: Hou Tao <houtao1@huawei.com>
> >>
> >> Disabling migration before calling ops->map_free() to simplify the
> >> freeing of map values or special fields allocated from bpf memory
> >> allocator.
> >>
> >> After disabling migration in bpf_map_free(), there is no need for
> >> additional migration_{disable|enable} pairs in the ->map_free()
> >> callbacks. Remove these redundant invocations.
> >>
> >> Signed-off-by: Hou Tao <houtao1@huawei.com>
> >> ---
> >>  kernel/bpf/arraymap.c          | 2 --
> >>  kernel/bpf/bpf_local_storage.c | 2 --
> >>  kernel/bpf/hashtab.c           | 2 --
> >>  kernel/bpf/range_tree.c        | 2 --
> >>  kernel/bpf/syscall.c           | 8 +++++++-
> >>  5 files changed, 7 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> >> index 451737493b17..eb28c0f219ee 100644
> >> --- a/kernel/bpf/arraymap.c
> >> +++ b/kernel/bpf/arraymap.c
> >> @@ -455,7 +455,6 @@ static void array_map_free(struct bpf_map *map)
> >>         struct bpf_array *array = container_of(map, struct bpf_array, map);
> >>         int i;
> >>
> >> -       migrate_disable();
> >>         if (!IS_ERR_OR_NULL(map->record)) {
> >>                 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
> >>                         for (i = 0; i < array->map.max_entries; i++) {
> >> @@ -472,7 +471,6 @@ static void array_map_free(struct bpf_map *map)
> >>                                 bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
> >>                 }
> >>         }
> >> -       migrate_enable();
> >>
> >>         if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
> >>                 bpf_array_free_percpu(array);
> >> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
> >> index b649cf736438..12cf6382175e 100644
> >> --- a/kernel/bpf/bpf_local_storage.c
> >> +++ b/kernel/bpf/bpf_local_storage.c
> >> @@ -905,13 +905,11 @@ void bpf_local_storage_map_free(struct bpf_map *map,
> >>                 while ((selem = hlist_entry_safe(
> >>                                 rcu_dereference_raw(hlist_first_rcu(&b->list)),
> >>                                 struct bpf_local_storage_elem, map_node))) {
> >> -                       migrate_disable();
> >>                         if (busy_counter)
> >>                                 this_cpu_inc(*busy_counter);
> >>                         bpf_selem_unlink(selem, true);
> >>                         if (busy_counter)
> >>                                 this_cpu_dec(*busy_counter);
> >> -                       migrate_enable();
> >>                         cond_resched_rcu();
> >>                 }
> >>                 rcu_read_unlock();
> >> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> >> index 8bf1ad326e02..6051f8a39fec 100644
> >> --- a/kernel/bpf/hashtab.c
> >> +++ b/kernel/bpf/hashtab.c
> >> @@ -1570,14 +1570,12 @@ static void htab_map_free(struct bpf_map *map)
> >>          * underneath and is responsible for waiting for callbacks to finish
> >>          * during bpf_mem_alloc_destroy().
> >>          */
> >> -       migrate_disable();
> >>         if (!htab_is_prealloc(htab)) {
> >>                 delete_all_elements(htab);
> >>         } else {
> >>                 htab_free_prealloced_fields(htab);
> >>                 prealloc_destroy(htab);
> >>         }
> >> -       migrate_enable();
> >>
> >>         bpf_map_free_elem_count(map);
> >>         free_percpu(htab->extra_elems);
> >> diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
> >> index 5bdf9aadca3a..37b80a23ae1a 100644
> >> --- a/kernel/bpf/range_tree.c
> >> +++ b/kernel/bpf/range_tree.c
> >> @@ -259,9 +259,7 @@ void range_tree_destroy(struct range_tree *rt)
> >>
> >>         while ((rn = range_it_iter_first(rt, 0, -1U))) {
> >>                 range_it_remove(rn, rt);
> >> -               migrate_disable();
> >>                 bpf_mem_free(&bpf_global_ma, rn);
> >> -               migrate_enable();
> >>         }
> >>  }
> >>
> >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >> index 0503ce1916b6..e7a41abe4809 100644
> >> --- a/kernel/bpf/syscall.c
> >> +++ b/kernel/bpf/syscall.c
> >> @@ -835,8 +835,14 @@ static void bpf_map_free(struct bpf_map *map)
> >>         struct btf_record *rec = map->record;
> >>         struct btf *btf = map->btf;
> >>
> >> -       /* implementation dependent freeing */
> >> +       /* implementation dependent freeing. Disabling migration to simplify
> >> +        * the free of values or special fields allocated from bpf memory
> >> +        * allocator.
> >> +        */
> >> +       migrate_disable();
> >>         map->ops->map_free(map);
> >> +       migrate_enable();
> >> +
> > I was about to comment on patches 10-13 that it's
> > better to do it in bpf_map_free(), but then I got to this patch.
> > All makes sense, but the patch breakdown is too fine grain.
> > Patches 10-13 introduce migrate pairs only to be deleted
> > in patch 15. Please squash them into one patch.
>
> OK. However I need to argue for the fine grained break down. The
> original though is that if disabling migration for ->map_free callback
> for all maps introduces some problems, we could revert the patch #15
> separately instead of reverting the squashed patch and moving the
> migrate_{disable|enable}() pair to maps which are OK with that change
> again.  What do you think ?

Feels overkill.
If migrate disable for the duration of map_free callback causes issues
we can introduce individual migrate pairs per map type
or revert the whole thing,
but imo it's all too theoretical at this point.

> >
> > Also you mention in the cover letter:
> >
> >> Considering the bpf-next CI is broken
> > What is this about?
>
> Er, I said it wrong. It is my local bpf-next setup. A few days ago, when
> I tried to verify the patch by using bpf_next/for-next treee, the
> running of test_maps and test_progs failed. Will check today that
> whether it is OK.

I see. /for-next maybe having issues. That needs to be investigated
separately.
Make sure /master is working well.

> >
> > The cant_migrate() additions throughout looks
> > a bit out of place. All that code doesn't care about migrations.
> > Only bpf_ma code does. Let's add it there instead?
> > The stack trace will tell us the caller anyway,
> > so no information lost.
>
> OK. However bpf_ma is not the only one which needs disabled migration.
> The reason that bpf_ma needs migrate_disable() is the use of
> this_cpu_ptr(). However, there are many places in bpf which use
> this_cpu_ptr() (e.g., bpf_for_each_array_elem) and this_cpu_{in|del}
> pair (e.g., bpf_cgrp_storage_lock).  I will check the cant_migrate which
> can be removed in v2.

Well, maybe not all cant_migrate() hunks across all patches.
But patches 16, 17, 18, 19 don't look like the right places
for cant_migrate().
diff mbox series

Patch

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 451737493b17..eb28c0f219ee 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -455,7 +455,6 @@  static void array_map_free(struct bpf_map *map)
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	int i;
 
-	migrate_disable();
 	if (!IS_ERR_OR_NULL(map->record)) {
 		if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 			for (i = 0; i < array->map.max_entries; i++) {
@@ -472,7 +471,6 @@  static void array_map_free(struct bpf_map *map)
 				bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
 		}
 	}
-	migrate_enable();
 
 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 		bpf_array_free_percpu(array);
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b649cf736438..12cf6382175e 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -905,13 +905,11 @@  void bpf_local_storage_map_free(struct bpf_map *map,
 		while ((selem = hlist_entry_safe(
 				rcu_dereference_raw(hlist_first_rcu(&b->list)),
 				struct bpf_local_storage_elem, map_node))) {
-			migrate_disable();
 			if (busy_counter)
 				this_cpu_inc(*busy_counter);
 			bpf_selem_unlink(selem, true);
 			if (busy_counter)
 				this_cpu_dec(*busy_counter);
-			migrate_enable();
 			cond_resched_rcu();
 		}
 		rcu_read_unlock();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 8bf1ad326e02..6051f8a39fec 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1570,14 +1570,12 @@  static void htab_map_free(struct bpf_map *map)
 	 * underneath and is responsible for waiting for callbacks to finish
 	 * during bpf_mem_alloc_destroy().
 	 */
-	migrate_disable();
 	if (!htab_is_prealloc(htab)) {
 		delete_all_elements(htab);
 	} else {
 		htab_free_prealloced_fields(htab);
 		prealloc_destroy(htab);
 	}
-	migrate_enable();
 
 	bpf_map_free_elem_count(map);
 	free_percpu(htab->extra_elems);
diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c
index 5bdf9aadca3a..37b80a23ae1a 100644
--- a/kernel/bpf/range_tree.c
+++ b/kernel/bpf/range_tree.c
@@ -259,9 +259,7 @@  void range_tree_destroy(struct range_tree *rt)
 
 	while ((rn = range_it_iter_first(rt, 0, -1U))) {
 		range_it_remove(rn, rt);
-		migrate_disable();
 		bpf_mem_free(&bpf_global_ma, rn);
-		migrate_enable();
 	}
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0503ce1916b6..e7a41abe4809 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -835,8 +835,14 @@  static void bpf_map_free(struct bpf_map *map)
 	struct btf_record *rec = map->record;
 	struct btf *btf = map->btf;
 
-	/* implementation dependent freeing */
+	/* implementation dependent freeing. Disabling migration to simplify
+	 * the free of values or special fields allocated from bpf memory
+	 * allocator.
+	 */
+	migrate_disable();
 	map->ops->map_free(map);
+	migrate_enable();
+
 	/* Delay freeing of btf_record for maps, as map_free
 	 * callback usually needs access to them. It is better to do it here
 	 * than require each callback to do the free itself manually.