diff mbox series

[bpf-next,12/16] bpf: Support basic operations for dynptr key in hash map

Message ID 20241008091501.8302-13-houtao@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Support dynptr key for hash map | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 11 this patch: 11
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline fail Was 1 now: 3
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Hou Tao Oct. 8, 2024, 9:14 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

The patch supports lookup, update, delete and lookup_delete operations
for hash map with dynptr map. There are two major differences between
the implementation of normal hash map and dynptr-keyed hash map:

1) dynptr-keyed hash map doesn't support pre-allocation.
The reason is that the dynptr in map key is allocated dynamically
through bpf mem allocator. The length limitation for these dynptrs is
4088 bytes now. Because there dynptrs are allocated dynamically, the
consumption of memory will be smaller compared with normal hash map when
there are big differences between the length of these dynptrs.

2) the freed element in dynptr-key map will not be reused immediately
For normal hash map, the freed element may be reused immediately by the
newly-added element, so the lookup may return an incorrect result due to
element deletion and element reuse. However dynptr-key map could not do
that, there are pointers (dynptrs) in the map key and the updates of
these dynptrs are not atomic: both the address and the length of the
dynptr will be updated. If the element is reused immediately, the access
of the dynptr in the freed element may incur invalid memory access due
to the mismatch between the address and the size of dynptr, so reuse the
freed element after one RCU grace period.

Beside the differences above, dynptr-keyed hash map also needs to handle
the maybe-nullified dynptr in the map key.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/hashtab.c | 283 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 257 insertions(+), 26 deletions(-)

Comments

Alexei Starovoitov Oct. 11, 2024, 4:47 p.m. UTC | #1
On Tue, Oct 8, 2024 at 2:02 AM Hou Tao <houtao@huaweicloud.com> wrote:
>
> From: Hou Tao <houtao1@huawei.com>
>
> The patch supports lookup, update, delete and lookup_delete operations
> for hash map with dynptr map. There are two major differences between
> the implementation of normal hash map and dynptr-keyed hash map:
>
> 1) dynptr-keyed hash map doesn't support pre-allocation.
> The reason is that the dynptr in map key is allocated dynamically
> through bpf mem allocator. The length limitation for these dynptrs is
> 4088 bytes now. Because there dynptrs are allocated dynamically, the
> consumption of memory will be smaller compared with normal hash map when
> there are big differences between the length of these dynptrs.
>
> 2) the freed element in dynptr-key map will not be reused immediately
> For normal hash map, the freed element may be reused immediately by the
> newly-added element, so the lookup may return an incorrect result due to
> element deletion and element reuse. However dynptr-key map could not do
> that, there are pointers (dynptrs) in the map key and the updates of
> these dynptrs are not atomic: both the address and the length of the
> dynptr will be updated. If the element is reused immediately, the access
> of the dynptr in the freed element may incur invalid memory access due
> to the mismatch between the address and the size of dynptr, so reuse the
> freed element after one RCU grace period.
>
> Beside the differences above, dynptr-keyed hash map also needs to handle
> the maybe-nullified dynptr in the map key.
>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  kernel/bpf/hashtab.c | 283 +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 257 insertions(+), 26 deletions(-)
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index b14b87463ee0..edf19d36a413 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -88,6 +88,7 @@ struct bpf_htab {
>         struct bpf_map map;
>         struct bpf_mem_alloc ma;
>         struct bpf_mem_alloc pcpu_ma;
> +       struct bpf_mem_alloc dynptr_ma;
>         struct bucket *buckets;
>         void *elems;
>         union {
> @@ -425,6 +426,7 @@ static int htab_map_alloc_check(union bpf_attr *attr)
>         bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
>         bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
>         bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
> +       bool dynptr_in_key = (attr->map_flags & BPF_F_DYNPTR_IN_KEY);
>         int numa_node = bpf_map_attr_numa_node(attr);
>
>         BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
> @@ -438,6 +440,14 @@ static int htab_map_alloc_check(union bpf_attr *attr)
>             !bpf_map_flags_access_ok(attr->map_flags))
>                 return -EINVAL;
>
> +       if (dynptr_in_key) {
> +               if (percpu || lru || prealloc || !attr->map_extra)
> +                       return -EINVAL;
> +               if ((attr->map_extra >> 32) || bpf_dynptr_check_size(attr->map_extra) ||
> +                   bpf_mem_alloc_check_size(percpu, attr->map_extra))
> +                       return -E2BIG;
> +       }
> +
>         if (!lru && percpu_lru)
>                 return -EINVAL;
>
> @@ -482,6 +492,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>          */
>         bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
>         bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
> +       bool dynptr_in_key = (attr->map_flags & BPF_F_DYNPTR_IN_KEY);
>         struct bpf_htab *htab;
>         int err, i;
>
> @@ -598,6 +609,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>                         if (err)
>                                 goto free_map_locked;
>                 }
> +               if (dynptr_in_key) {
> +                       err = bpf_mem_alloc_init(&htab->dynptr_ma, 0, false);
> +                       if (err)
> +                               goto free_map_locked;
> +               }
>         }
>
>         return &htab->map;
> @@ -610,6 +626,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>         for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
>                 free_percpu(htab->map_locked[i]);
>         bpf_map_area_free(htab->buckets);
> +       bpf_mem_alloc_destroy(&htab->dynptr_ma);
>         bpf_mem_alloc_destroy(&htab->pcpu_ma);
>         bpf_mem_alloc_destroy(&htab->ma);
>  free_elem_count:
> @@ -620,13 +637,55 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>         return ERR_PTR(err);
>  }
>
> -static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
> +static inline u32 __htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
>  {
>         if (likely(key_len % 4 == 0))
>                 return jhash2(key, key_len / 4, hashrnd);
>         return jhash(key, key_len, hashrnd);
>  }
>
> +static u32 htab_map_dynptr_hash(const void *key, u32 key_len, u32 hashrnd,
> +                               const struct btf_record *rec)
> +{
> +       unsigned int i, cnt = rec->cnt;
> +       unsigned int hash = hashrnd;
> +       unsigned int offset = 0;
> +
> +       for (i = 0; i < cnt; i++) {
> +               const struct btf_field *field = &rec->fields[i];
> +               const struct bpf_dynptr_kern *kptr;
> +               unsigned int len;
> +
> +               if (field->type != BPF_DYNPTR)
> +                       continue;
> +
> +               /* non-dynptr part ? */
> +               if (offset < field->offset)
> +                       hash = jhash(key + offset, field->offset - offset, hash);
> +
> +               /* Skip nullified dynptr */
> +               kptr = key + field->offset;
> +               if (kptr->data) {
> +                       len = __bpf_dynptr_size(kptr);
> +                       hash = jhash(__bpf_dynptr_data(kptr, len), len, hash);
> +               }
> +               offset = field->offset + field->size;
> +       }
> +
> +       if (offset < key_len)
> +               hash = jhash(key + offset, key_len - offset, hash);
> +
> +       return hash;
> +}
> +
> +static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd,
> +                               const struct btf_record *rec)
> +{
> +       if (!rec)
> +               return __htab_map_hash(key, key_len, hashrnd);
> +       return htab_map_dynptr_hash(key, key_len, hashrnd, rec);
> +}
> +
>  static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
>  {
>         return &htab->buckets[hash & (htab->n_buckets - 1)];
> @@ -637,15 +696,68 @@ static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32
>         return &__select_bucket(htab, hash)->head;
>  }
>
> +static bool is_same_dynptr_key(const void *key, const void *tgt, unsigned int key_size,
> +                              const struct btf_record *rec)
> +{
> +       unsigned int i, cnt = rec->cnt;
> +       unsigned int offset = 0;
> +
> +       for (i = 0; i < cnt; i++) {
> +               const struct btf_field *field = &rec->fields[i];
> +               const struct bpf_dynptr_kern *kptr, *tgt_kptr;
> +               const void *data, *tgt_data;
> +               unsigned int len;
> +
> +               if (field->type != BPF_DYNPTR)
> +                       continue;
> +
> +               if (offset < field->offset &&
> +                   memcmp(key + offset, tgt + offset, field->offset - offset))
> +                       return false;
> +
> +               /*
> +                * For a nullified dynptr in the target key, __bpf_dynptr_size()
> +                * will return 0, and there will be no match for the target key.
> +                */
> +               kptr = key + field->offset;
> +               tgt_kptr = tgt + field->offset;
> +               len = __bpf_dynptr_size(kptr);
> +               if (len != __bpf_dynptr_size(tgt_kptr))
> +                       return false;
> +
> +               data = __bpf_dynptr_data(kptr, len);
> +               tgt_data = __bpf_dynptr_data(tgt_kptr, len);
> +               if (memcmp(data, tgt_data, len))
> +                       return false;
> +
> +               offset = field->offset + field->size;
> +       }
> +
> +       if (offset < key_size &&
> +           memcmp(key + offset, tgt + offset, key_size - offset))
> +               return false;
> +
> +       return true;
> +}
> +
> +static inline bool htab_is_same_key(const void *key, const void *tgt, unsigned int key_size,
> +                                   const struct btf_record *rec)
> +{
> +       if (!rec)
> +               return !memcmp(key, tgt, key_size);
> +       return is_same_dynptr_key(key, tgt, key_size, rec);
> +}
> +
>  /* this lookup function can only be called with bucket lock taken */
>  static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
> -                                        void *key, u32 key_size)
> +                                        void *key, u32 key_size,
> +                                        const struct btf_record *record)
>  {
>         struct hlist_nulls_node *n;
>         struct htab_elem *l;
>
>         hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
> -               if (l->hash == hash && !memcmp(&l->key, key, key_size))
> +               if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
>                         return l;
>
>         return NULL;
> @@ -657,14 +769,15 @@ static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash
>   */
>  static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
>                                                u32 hash, void *key,
> -                                              u32 key_size, u32 n_buckets)
> +                                              u32 key_size, u32 n_buckets,
> +                                              const struct btf_record *record)
>  {
>         struct hlist_nulls_node *n;
>         struct htab_elem *l;
>
>  again:
>         hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
> -               if (l->hash == hash && !memcmp(&l->key, key, key_size))
> +               if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
>                         return l;

If I'm reading this correctly the support for dynptr in map keys
adds two map->key_record != NULL checks in the fast path,
hence what you said in cover letter:

> It seems adding dynptr key support in hash map degrades the lookup
> performance about 12% and degrades the update performance about 7%. Will
> investigate these degradation first.
>
> a) lookup
> max_entries = 8K
>
> before:
> 0:hash_lookup 72347325 lookups per sec
>
> after:
> 0:hash_lookup 64758890 lookups per sec

is surprising.

Two conditional branches contribute to 12% performance loss?
Something fishy.
Try unlikely() to hopefully recover most of it.
After analyzing 'perf report/annotate', of course.

Worst case we can specialize htab_map_gen_lookup() to
call into different helpers.
Hou Tao Oct. 30, 2024, 10:02 a.m. UTC | #2
Hi,

On 10/12/2024 12:47 AM, Alexei Starovoitov wrote:
> On Tue, Oct 8, 2024 at 2:02 AM Hou Tao <houtao@huaweicloud.com> wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> The patch supports lookup, update, delete and lookup_delete operations
>> for hash map with dynptr map. There are two major differences between
>> the implementation of normal hash map and dynptr-keyed hash map:
>>
>> 1) dynptr-keyed hash map doesn't support pre-allocation.
>> The reason is that the dynptr in map key is allocated dynamically
>> through bpf mem allocator. The length limitation for these dynptrs is
>> 4088 bytes now. Because there dynptrs are allocated dynamically, the
>> consumption of memory will be smaller compared with normal hash map when
>> there are big differences between the length of these dynptrs.
>>
>> 2) the freed element in dynptr-key map will not be reused immediately
>> For normal hash map, the freed element may be reused immediately by the
>> newly-added element, so the lookup may return an incorrect result due to
>> element deletion and element reuse. However dynptr-key map could not do
>> that, there are pointers (dynptrs) in the map key and the updates of
>> these dynptrs are not atomic: both the address and the length of the
>> dynptr will be updated. If the element is reused immediately, the access
>> of the dynptr in the freed element may incur invalid memory access due
>> to the mismatch between the address and the size of dynptr, so reuse the
>> freed element after one RCU grace period.
>>
>> Beside the differences above, dynptr-keyed hash map also needs to handle
>> the maybe-nullified dynptr in the map key.
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  kernel/bpf/hashtab.c | 283 +++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 257 insertions(+), 26 deletions(-)
>>
>> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
>> index b14b87463ee0..edf19d36a413 100644
>> --- a/kernel/bpf/hashtab.c
>> +++ b/kernel/bpf/hashtab.c
>> @@ -88,6 +88,7 @@ struct bpf_htab {
>>         struct bpf_map map;
>>         struct bpf_mem_alloc ma;
>>         struct bpf_mem_alloc pcpu_ma;
>> +      

SNIP
>> +
>> +static inline bool htab_is_same_key(const void *key, const void *tgt, unsigned int key_size,
>> +                                   const struct btf_record *rec)
>> +{
>> +       if (!rec)
>> +               return !memcmp(key, tgt, key_size);
>> +       return is_same_dynptr_key(key, tgt, key_size, rec);
>> +}
>> +
>>  /* this lookup function can only be called with bucket lock taken */
>>  static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
>> -                                        void *key, u32 key_size)
>> +                                        void *key, u32 key_size,
>> +                                        const struct btf_record *record)
>>  {
>>         struct hlist_nulls_node *n;
>>         struct htab_elem *l;
>>
>>         hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
>> -               if (l->hash == hash && !memcmp(&l->key, key, key_size))
>> +               if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
>>                         return l;
>>
>>         return NULL;
>> @@ -657,14 +769,15 @@ static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash
>>   */
>>  static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
>>                                                u32 hash, void *key,
>> -                                              u32 key_size, u32 n_buckets)
>> +                                              u32 key_size, u32 n_buckets,
>> +                                              const struct btf_record *record)
>>  {
>>         struct hlist_nulls_node *n;
>>         struct htab_elem *l;
>>
>>  again:
>>         hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
>> -               if (l->hash == hash && !memcmp(&l->key, key, key_size))
>> +               if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
>>                         return l;
> If I'm reading this correctly the support for dynptr in map keys
> adds two map->key_record != NULL checks in the fast path,
> hence what you said in cover letter:
>
>> It seems adding dynptr key support in hash map degrades the lookup
>> performance about 12% and degrades the update performance about 7%. Will
>> investigate these degradation first.
>>
>> a) lookup
>> max_entries = 8K
>>
>> before:
>> 0:hash_lookup 72347325 lookups per sec
>>
>> after:
>> 0:hash_lookup 64758890 lookups per sec
> is surprising.
>
> Two conditional branches contribute to 12% performance loss?
> Something fishy.
> Try unlikely() to hopefully recover most of it.
> After analyzing 'perf report/annotate', of course.

Using unlikely/likely doesn't help much. It seems the big performance
gap is due to the inline of lookup_nulls_elem_raw() in
__htab_map_lookup_elem(). Still don't know the reason why
lookup_nulls_elem_raw() is not inlined after the change. After marking
the lookup_nulls_elem_raw() function as inline, the performance gap is
within ~2% for htab map lookup.  For htab_map_update/delete_elem(),  the
reason and the result is similar. Should I mark these two functions
(lookup_nulls_elem_raw and lookup_elem_raw) as inline in the next
revision, or should I leave it as is and try to fix the degradation in
another patch set ?

> Worst case we can specialize htab_map_gen_lookup() to
> call into different helpers.
> .
Alexei Starovoitov Nov. 2, 2024, 6:31 p.m. UTC | #3
On Wed, Oct 30, 2024 at 3:02 AM Hou Tao <houtao@huaweicloud.com> wrote:
>
> >> a) lookup
> >> max_entries = 8K
> >>
> >> before:
> >> 0:hash_lookup 72347325 lookups per sec
> >>
> >> after:
> >> 0:hash_lookup 64758890 lookups per sec
> > is surprising.
> >
> > Two conditional branches contribute to 12% performance loss?
> > Something fishy.
> > Try unlikely() to hopefully recover most of it.
> > After analyzing 'perf report/annotate', of course.
>
> Using unlikely/likely doesn't help much. It seems the big performance
> gap is due to the inline of lookup_nulls_elem_raw() in
> __htab_map_lookup_elem(). Still don't know the reason why
> lookup_nulls_elem_raw() is not inlined after the change. After marking
> the lookup_nulls_elem_raw() function as inline, the performance gap is
> within ~2% for htab map lookup.  For htab_map_update/delete_elem(),  the
> reason and the result is similar. Should I mark these two functions
> (lookup_nulls_elem_raw and lookup_elem_raw) as inline in the next
> revision, or should I leave it as is and try to fix the degradation in
> another patch set ?

from 12% to 2% by adding 'inline' to lookup_[nulls_]elem_raw() ?
Certainly do it in the patch set.
diff mbox series

Patch

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b14b87463ee0..edf19d36a413 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -88,6 +88,7 @@  struct bpf_htab {
 	struct bpf_map map;
 	struct bpf_mem_alloc ma;
 	struct bpf_mem_alloc pcpu_ma;
+	struct bpf_mem_alloc dynptr_ma;
 	struct bucket *buckets;
 	void *elems;
 	union {
@@ -425,6 +426,7 @@  static int htab_map_alloc_check(union bpf_attr *attr)
 	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
 	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
 	bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
+	bool dynptr_in_key = (attr->map_flags & BPF_F_DYNPTR_IN_KEY);
 	int numa_node = bpf_map_attr_numa_node(attr);
 
 	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
@@ -438,6 +440,14 @@  static int htab_map_alloc_check(union bpf_attr *attr)
 	    !bpf_map_flags_access_ok(attr->map_flags))
 		return -EINVAL;
 
+	if (dynptr_in_key) {
+		if (percpu || lru || prealloc || !attr->map_extra)
+			return -EINVAL;
+		if ((attr->map_extra >> 32) || bpf_dynptr_check_size(attr->map_extra) ||
+		    bpf_mem_alloc_check_size(percpu, attr->map_extra))
+			return -E2BIG;
+	}
+
 	if (!lru && percpu_lru)
 		return -EINVAL;
 
@@ -482,6 +492,7 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	 */
 	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
 	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+	bool dynptr_in_key = (attr->map_flags & BPF_F_DYNPTR_IN_KEY);
 	struct bpf_htab *htab;
 	int err, i;
 
@@ -598,6 +609,11 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 			if (err)
 				goto free_map_locked;
 		}
+		if (dynptr_in_key) {
+			err = bpf_mem_alloc_init(&htab->dynptr_ma, 0, false);
+			if (err)
+				goto free_map_locked;
+		}
 	}
 
 	return &htab->map;
@@ -610,6 +626,7 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
 		free_percpu(htab->map_locked[i]);
 	bpf_map_area_free(htab->buckets);
+	bpf_mem_alloc_destroy(&htab->dynptr_ma);
 	bpf_mem_alloc_destroy(&htab->pcpu_ma);
 	bpf_mem_alloc_destroy(&htab->ma);
 free_elem_count:
@@ -620,13 +637,55 @@  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
+static inline u32 __htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
 {
 	if (likely(key_len % 4 == 0))
 		return jhash2(key, key_len / 4, hashrnd);
 	return jhash(key, key_len, hashrnd);
 }
 
+static u32 htab_map_dynptr_hash(const void *key, u32 key_len, u32 hashrnd,
+				const struct btf_record *rec)
+{
+	unsigned int i, cnt = rec->cnt;
+	unsigned int hash = hashrnd;
+	unsigned int offset = 0;
+
+	for (i = 0; i < cnt; i++) {
+		const struct btf_field *field = &rec->fields[i];
+		const struct bpf_dynptr_kern *kptr;
+		unsigned int len;
+
+		if (field->type != BPF_DYNPTR)
+			continue;
+
+		/* non-dynptr part ? */
+		if (offset < field->offset)
+			hash = jhash(key + offset, field->offset - offset, hash);
+
+		/* Skip nullified dynptr */
+		kptr = key + field->offset;
+		if (kptr->data) {
+			len = __bpf_dynptr_size(kptr);
+			hash = jhash(__bpf_dynptr_data(kptr, len), len, hash);
+		}
+		offset = field->offset + field->size;
+	}
+
+	if (offset < key_len)
+		hash = jhash(key + offset, key_len - offset, hash);
+
+	return hash;
+}
+
+static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd,
+				const struct btf_record *rec)
+{
+	if (!rec)
+		return __htab_map_hash(key, key_len, hashrnd);
+	return htab_map_dynptr_hash(key, key_len, hashrnd, rec);
+}
+
 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
 {
 	return &htab->buckets[hash & (htab->n_buckets - 1)];
@@ -637,15 +696,68 @@  static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32
 	return &__select_bucket(htab, hash)->head;
 }
 
+static bool is_same_dynptr_key(const void *key, const void *tgt, unsigned int key_size,
+			       const struct btf_record *rec)
+{
+	unsigned int i, cnt = rec->cnt;
+	unsigned int offset = 0;
+
+	for (i = 0; i < cnt; i++) {
+		const struct btf_field *field = &rec->fields[i];
+		const struct bpf_dynptr_kern *kptr, *tgt_kptr;
+		const void *data, *tgt_data;
+		unsigned int len;
+
+		if (field->type != BPF_DYNPTR)
+			continue;
+
+		if (offset < field->offset &&
+		    memcmp(key + offset, tgt + offset, field->offset - offset))
+			return false;
+
+		/*
+		 * For a nullified dynptr in the target key, __bpf_dynptr_size()
+		 * will return 0, and there will be no match for the target key.
+		 */
+		kptr = key + field->offset;
+		tgt_kptr = tgt + field->offset;
+		len = __bpf_dynptr_size(kptr);
+		if (len != __bpf_dynptr_size(tgt_kptr))
+			return false;
+
+		data = __bpf_dynptr_data(kptr, len);
+		tgt_data = __bpf_dynptr_data(tgt_kptr, len);
+		if (memcmp(data, tgt_data, len))
+			return false;
+
+		offset = field->offset + field->size;
+	}
+
+	if (offset < key_size &&
+	    memcmp(key + offset, tgt + offset, key_size - offset))
+		return false;
+
+	return true;
+}
+
+static inline bool htab_is_same_key(const void *key, const void *tgt, unsigned int key_size,
+				    const struct btf_record *rec)
+{
+	if (!rec)
+		return !memcmp(key, tgt, key_size);
+	return is_same_dynptr_key(key, tgt, key_size, rec);
+}
+
 /* this lookup function can only be called with bucket lock taken */
 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
-					 void *key, u32 key_size)
+					 void *key, u32 key_size,
+					 const struct btf_record *record)
 {
 	struct hlist_nulls_node *n;
 	struct htab_elem *l;
 
 	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+		if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
 			return l;
 
 	return NULL;
@@ -657,14 +769,15 @@  static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash
  */
 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
 					       u32 hash, void *key,
-					       u32 key_size, u32 n_buckets)
+					       u32 key_size, u32 n_buckets,
+					       const struct btf_record *record)
 {
 	struct hlist_nulls_node *n;
 	struct htab_elem *l;
 
 again:
 	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+		if (l->hash == hash && htab_is_same_key(l->key, key, key_size, record))
 			return l;
 
 	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
@@ -681,6 +794,7 @@  static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	const struct btf_record *record;
 	struct hlist_nulls_head *head;
 	struct htab_elem *l;
 	u32 hash, key_size;
@@ -689,12 +803,13 @@  static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
+	record = map->key_record;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, record);
 
 	head = select_bucket(htab, hash);
 
-	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets, record);
 
 	return l;
 }
@@ -784,6 +899,26 @@  static int htab_lru_map_gen_lookup(struct bpf_map *map,
 	return insn - insn_buf;
 }
 
+static void htab_free_dynptr_key(struct bpf_htab *htab, void *key)
+{
+	const struct btf_record *record = htab->map.key_record;
+	unsigned int i, cnt = record->cnt;
+
+	for (i = 0; i < cnt; i++) {
+		const struct btf_field *field = &record->fields[i];
+		struct bpf_dynptr_kern *kptr;
+
+		if (field->type != BPF_DYNPTR)
+			continue;
+
+		/* It may be accessed concurrently, so don't overwrite
+		 * the kptr.
+		 */
+		kptr = key + field->offset;
+		bpf_mem_free_rcu(&htab->dynptr_ma, kptr->data);
+	}
+}
+
 static void check_and_free_fields(struct bpf_htab *htab,
 				  struct htab_elem *elem)
 {
@@ -834,6 +969,68 @@  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 	return l == tgt_l;
 }
 
+static int htab_copy_dynptr_key(struct bpf_htab *htab, void *dst_key, const void *key, u32 key_size)
+{
+	const struct btf_record *rec = htab->map.key_record;
+	struct bpf_dynptr_kern *dst_kptr;
+	const struct btf_field *field;
+	unsigned int i, cnt, offset;
+	int err;
+
+	offset = 0;
+	cnt = rec->cnt;
+	for (i = 0; i < cnt; i++) {
+		const struct bpf_dynptr_kern *kptr;
+		unsigned int len;
+		const void *data;
+		void *dst_data;
+
+		field = &rec->fields[i];
+		if (field->type != BPF_DYNPTR)
+			continue;
+
+		if (offset < field->offset)
+			memcpy(dst_key + offset, key + offset, field->offset - offset);
+
+		/* Doesn't support nullified dynptr in map key */
+		kptr = key + field->offset;
+		if (!kptr->data) {
+			err = -EINVAL;
+			goto out;
+		}
+		len = __bpf_dynptr_size(kptr);
+		data = __bpf_dynptr_data(kptr, len);
+
+		dst_data = bpf_mem_alloc(&htab->dynptr_ma, len);
+		if (!dst_data) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		memcpy(dst_data, data, len);
+		dst_kptr = dst_key + field->offset;
+		bpf_dynptr_init(dst_kptr, dst_data, BPF_DYNPTR_TYPE_LOCAL, 0, len);
+
+		offset = field->offset + field->size;
+	}
+
+	if (offset < key_size)
+		memcpy(dst_key + offset, key + offset, key_size - offset);
+
+	return 0;
+
+out:
+	for (; i > 0; i--) {
+		field = &rec->fields[i - 1];
+		if (field->type != BPF_DYNPTR)
+			continue;
+
+		dst_kptr = dst_key + field->offset;
+		bpf_mem_free(&htab->dynptr_ma, dst_kptr->data);
+	}
+	return err;
+}
+
 /* Called from syscall */
 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
@@ -850,12 +1047,12 @@  static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	if (!key)
 		goto find_first_elem;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, NULL);
 
 	head = select_bucket(htab, hash);
 
 	/* lookup the key */
-	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets, NULL);
 
 	if (!l)
 		goto find_first_elem;
@@ -895,10 +1092,27 @@  static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 
 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 {
+	bool dynptr_in_key = bpf_map_has_dynptr_key(&htab->map);
+
+	if (dynptr_in_key)
+		htab_free_dynptr_key(htab, l->key);
+
 	check_and_free_fields(htab, l);
+
 	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
 		bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
-	bpf_mem_cache_free(&htab->ma, l);
+
+	/*
+	 * For dynptr key, the update of dynptr in the key is not atomic:
+	 * both the pointer and the size are updated. If the element is reused
+	 * immediately, the access of the dynptr key during lookup procedure may
+	 * incur invalid memory access due to mismatch between the size and the
+	 * data pointer, so reuse the element after one RCU GP.
+	 */
+	if (dynptr_in_key)
+		bpf_mem_cache_free_rcu(&htab->ma, l);
+	else
+		bpf_mem_cache_free(&htab->ma, l);
 }
 
 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
@@ -1046,7 +1260,19 @@  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 		}
 	}
 
-	memcpy(l_new->key, key, key_size);
+	if (bpf_map_has_dynptr_key(&htab->map)) {
+		int copy_err;
+
+		copy_err = htab_copy_dynptr_key(htab, l_new->key, key, key_size);
+		if (copy_err) {
+			bpf_mem_cache_free(&htab->ma, l_new);
+			l_new = ERR_PTR(copy_err);
+			goto dec_count;
+		}
+	} else {
+		memcpy(l_new->key, key, key_size);
+	}
+
 	if (percpu) {
 		if (prealloc) {
 			pptr = htab_elem_get_ptr(l_new, key_size);
@@ -1102,6 +1328,7 @@  static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 				 u64 map_flags)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	const struct btf_record *key_record = map->key_record;
 	struct htab_elem *l_new = NULL, *l_old;
 	struct hlist_nulls_head *head;
 	unsigned long flags;
@@ -1118,7 +1345,7 @@  static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, key_record);
 
 	b = __select_bucket(htab, hash);
 	head = &b->head;
@@ -1128,7 +1355,7 @@  static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 			return -EINVAL;
 		/* find an element without taking the bucket lock */
 		l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
-					      htab->n_buckets);
+					      htab->n_buckets, key_record);
 		ret = check_flags(htab, l_old, map_flags);
 		if (ret)
 			return ret;
@@ -1149,7 +1376,7 @@  static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (ret)
 		return ret;
 
-	l_old = lookup_elem_raw(head, hash, key, key_size);
+	l_old = lookup_elem_raw(head, hash, key, key_size, key_record);
 
 	ret = check_flags(htab, l_old, map_flags);
 	if (ret)
@@ -1221,7 +1448,7 @@  static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = __htab_map_hash(key, key_size, htab->hashrnd);
 
 	b = __select_bucket(htab, hash);
 	head = &b->head;
@@ -1241,7 +1468,7 @@  static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
 	if (ret)
 		goto err_lock_bucket;
 
-	l_old = lookup_elem_raw(head, hash, key, key_size);
+	l_old = lookup_elem_raw(head, hash, key, key_size, NULL);
 
 	ret = check_flags(htab, l_old, map_flags);
 	if (ret)
@@ -1290,7 +1517,7 @@  static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = __htab_map_hash(key, key_size, htab->hashrnd);
 
 	b = __select_bucket(htab, hash);
 	head = &b->head;
@@ -1299,7 +1526,7 @@  static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	if (ret)
 		return ret;
 
-	l_old = lookup_elem_raw(head, hash, key, key_size);
+	l_old = lookup_elem_raw(head, hash, key, key_size, NULL);
 
 	ret = check_flags(htab, l_old, map_flags);
 	if (ret)
@@ -1345,7 +1572,7 @@  static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, NULL);
 
 	b = __select_bucket(htab, hash);
 	head = &b->head;
@@ -1365,7 +1592,7 @@  static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 	if (ret)
 		goto err_lock_bucket;
 
-	l_old = lookup_elem_raw(head, hash, key, key_size);
+	l_old = lookup_elem_raw(head, hash, key, key_size, NULL);
 
 	ret = check_flags(htab, l_old, map_flags);
 	if (ret)
@@ -1411,6 +1638,7 @@  static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 static long htab_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	const struct btf_record *key_record = map->key_record;
 	struct hlist_nulls_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
@@ -1423,7 +1651,7 @@  static long htab_map_delete_elem(struct bpf_map *map, void *key)
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, key_record);
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
@@ -1431,7 +1659,7 @@  static long htab_map_delete_elem(struct bpf_map *map, void *key)
 	if (ret)
 		return ret;
 
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_elem_raw(head, hash, key, key_size, key_record);
 
 	if (l) {
 		hlist_nulls_del_rcu(&l->hash_node);
@@ -1459,7 +1687,7 @@  static long htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 
 	key_size = map->key_size;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = __htab_map_hash(key, key_size, htab->hashrnd);
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
@@ -1467,7 +1695,7 @@  static long htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	if (ret)
 		return ret;
 
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_elem_raw(head, hash, key, key_size, NULL);
 
 	if (l)
 		hlist_nulls_del_rcu(&l->hash_node);
@@ -1564,6 +1792,7 @@  static void htab_map_free(struct bpf_map *map)
 	bpf_map_free_elem_count(map);
 	free_percpu(htab->extra_elems);
 	bpf_map_area_free(htab->buckets);
+	bpf_mem_alloc_destroy(&htab->dynptr_ma);
 	bpf_mem_alloc_destroy(&htab->pcpu_ma);
 	bpf_mem_alloc_destroy(&htab->ma);
 	if (htab->use_percpu_counter)
@@ -1600,6 +1829,7 @@  static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
 					     bool is_percpu, u64 flags)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	const struct btf_record *key_record;
 	struct hlist_nulls_head *head;
 	unsigned long bflags;
 	struct htab_elem *l;
@@ -1608,8 +1838,9 @@  static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
 	int ret;
 
 	key_size = map->key_size;
+	key_record = map->key_record;
 
-	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	hash = htab_map_hash(key, key_size, htab->hashrnd, key_record);
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
@@ -1617,7 +1848,7 @@  static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
 	if (ret)
 		return ret;
 
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_elem_raw(head, hash, key, key_size, key_record);
 	if (!l) {
 		ret = -ENOENT;
 	} else {