diff mbox series

[bpf,1/2] bpf: fix a bpf_timer initialization issue

Message ID 20220211073908.3455653-1-yhs@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: fix a bpf_timer initialization issue | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf
netdev/apply fail Patch does not apply to bpf
bpf/vmtest-bpf-PR fail merge-conflict

Commit Message

Yonghong Song Feb. 11, 2022, 7:39 a.m. UTC
The patch in [1] intends to fix a bpf_timer related issue,
but the fix caused existing 'timer' selftest to fail with
hang or some random errors. After some debug, I found
an issue with check_and_init_map_value() in the hashtab.c.
More specifically, in hashtab.c, we have code
  l_new = bpf_map_kmalloc_node(&htab->map, ...)
  check_and_init_map_value(&htab->map, l_new...)
Note that bpf_map_kmalloc_node() does not do initialization
so l_new contains random value.

The function check_and_init_map_value() intends to zero the
bpf_spin_lock and bpf_timer if they exist in the map.
But I found bpf_spin_lock is zero'ed but bpf_timer is not zero'ed.
With [1], later copy_map_value() skips copying of
bpf_spin_lock and bpf_timer. The non-zero bpf_timer caused
random failures for 'timer' selftest.
Without [1], for both bpf_spin_lock and bpf_timer case,
bpf_timer will be zero'ed, so 'timer' self test is okay.

For check_and_init_map_value(), why bpf_spin_lock is zero'ed
properly while bpf_timer not. In bpf uapi header, we have
  struct bpf_spin_lock {
        __u32   val;
  };
  struct bpf_timer {
        __u64 :64;
        __u64 :64;
  } __attribute__((aligned(8)));

The initialization code:
  *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
      (struct bpf_spin_lock){};
  *(struct bpf_timer *)(dst + map->timer_off) =
      (struct bpf_timer){};
It appears the compiler has no obligation to initialize anonymous fields.
For example, let us use clang with bpf target as below:
  $ cat t.c
  struct bpf_timer {
        unsigned long long :64;
  };
  struct bpf_timer2 {
        unsigned long long a;
  };

  void test(struct bpf_timer *t) {
    *t = (struct bpf_timer){};
  }
  void test2(struct bpf_timer2 *t) {
    *t = (struct bpf_timer2){};
  }
  $ clang -target bpf -O2 -c -g t.c
  $ llvm-objdump -d t.o
   ...
   0000000000000000 <test>:
       0:       95 00 00 00 00 00 00 00 exit
   0000000000000008 <test2>:
       1:       b7 02 00 00 00 00 00 00 r2 = 0
       2:       7b 21 00 00 00 00 00 00 *(u64 *)(r1 + 0) = r2
       3:       95 00 00 00 00 00 00 00 exit

To fix the problem, let use memset for bpf_timer case in
check_and_init_map_value(). For consistency, memset is also
used for bpf_spin_lock case.

  [1] https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com/

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

Comments

Yonghong Song Feb. 11, 2022, 7:41 a.m. UTC | #1
On 2/10/22 11:39 PM, Yonghong Song wrote:
> The patch in [1] intends to fix a bpf_timer related issue,
> but the fix caused existing 'timer' selftest to fail with
> hang or some random errors. After some debug, I found
> an issue with check_and_init_map_value() in the hashtab.c.
> More specifically, in hashtab.c, we have code
>    l_new = bpf_map_kmalloc_node(&htab->map, ...)
>    check_and_init_map_value(&htab->map, l_new...)
> Note that bpf_map_kmalloc_node() does not do initialization
> so l_new contains random value.
> 
> The function check_and_init_map_value() intends to zero the
> bpf_spin_lock and bpf_timer if they exist in the map.
> But I found bpf_spin_lock is zero'ed but bpf_timer is not zero'ed.
> With [1], later copy_map_value() skips copying of
> bpf_spin_lock and bpf_timer. The non-zero bpf_timer caused
> random failures for 'timer' selftest.
> Without [1], for both bpf_spin_lock and bpf_timer case,
> bpf_timer will be zero'ed, so 'timer' self test is okay.
> 
> For check_and_init_map_value(), why bpf_spin_lock is zero'ed
> properly while bpf_timer not. In bpf uapi header, we have
>    struct bpf_spin_lock {
>          __u32   val;
>    };
>    struct bpf_timer {
>          __u64 :64;
>          __u64 :64;
>    } __attribute__((aligned(8)));
> 
> The initialization code:
>    *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
>        (struct bpf_spin_lock){};
>    *(struct bpf_timer *)(dst + map->timer_off) =
>        (struct bpf_timer){};
> It appears the compiler has no obligation to initialize anonymous fields.
> For example, let us use clang with bpf target as below:
>    $ cat t.c
>    struct bpf_timer {
>          unsigned long long :64;
>    };
>    struct bpf_timer2 {
>          unsigned long long a;
>    };
> 
>    void test(struct bpf_timer *t) {
>      *t = (struct bpf_timer){};
>    }
>    void test2(struct bpf_timer2 *t) {
>      *t = (struct bpf_timer2){};
>    }
>    $ clang -target bpf -O2 -c -g t.c
>    $ llvm-objdump -d t.o
>     ...
>     0000000000000000 <test>:
>         0:       95 00 00 00 00 00 00 00 exit
>     0000000000000008 <test2>:
>         1:       b7 02 00 00 00 00 00 00 r2 = 0
>         2:       7b 21 00 00 00 00 00 00 *(u64 *)(r1 + 0) = r2
>         3:       95 00 00 00 00 00 00 00 exit
> 
> To fix the problem, let use memset for bpf_timer case in
> check_and_init_map_value(). For consistency, memset is also
> used for bpf_spin_lock case.
> 
>    [1] https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com/
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>

Sorry, missed Fixes tag:
   Fixes: 68134668c17f3 ("bpf: Add map side support for bpf timers.")

> ---
>   include/linux/bpf.h | 6 ++----
>   1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index c1c554249698..f19abc59b6cd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -220,11 +220,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
>   static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
>   {
>   	if (unlikely(map_value_has_spin_lock(map)))
> -		*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
> -			(struct bpf_spin_lock){};
> +		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
>   	if (unlikely(map_value_has_timer(map)))
> -		*(struct bpf_timer *)(dst + map->timer_off) =
> -			(struct bpf_timer){};
> +		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
>   }
>   
>   /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c1c554249698..f19abc59b6cd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -220,11 +220,9 @@  static inline bool map_value_has_timer(const struct bpf_map *map)
 static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 {
 	if (unlikely(map_value_has_spin_lock(map)))
-		*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
-			(struct bpf_spin_lock){};
+		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
 	if (unlikely(map_value_has_timer(map)))
-		*(struct bpf_timer *)(dst + map->timer_off) =
-			(struct bpf_timer){};
+		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
 }
 
 /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */