diff mbox series

[bpf-next,v1,1/8] bpf: Introduce pseudo_btf_id

Message ID 20200819224030.1615203-2-haoluo@google.com (mailing list archive)
State New
Headers show
Series bpf: BTF support for ksyms | expand

Commit Message

Hao Luo Aug. 19, 2020, 10:40 p.m. UTC
Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
ksym so that further dereferences on the ksym can use the BTF info
to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
the verifier reads the btf_id stored in the insn[0]'s imm field and
marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
which is encoded in btf_vminux by pahole. If the VAR is not of a struct
type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
and the mem_size is resolved to the size of the VAR's type.

From the VAR btf_id, the verifier can also read the address of the
ksym's corresponding kernel var from kallsyms and use that to fill
dst_reg.

Therefore, the proper functionality of pseudo_btf_id depends on (1)
kallsyms and (2) the encoding of kernel global VARs in pahole, which
should be available since pahole v1.18.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 include/linux/btf.h      | 15 +++++++++
 include/uapi/linux/bpf.h | 38 ++++++++++++++++------
 kernel/bpf/btf.c         | 15 ---------
 kernel/bpf/verifier.c    | 68 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+), 24 deletions(-)

Comments

Yonghong Song Aug. 20, 2020, 3:22 p.m. UTC | #1
On 8/19/20 3:40 PM, Hao Luo wrote:
> Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
> ksym so that further dereferences on the ksym can use the BTF info
> to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
> the verifier reads the btf_id stored in the insn[0]'s imm field and
> marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
> which is encoded in btf_vminux by pahole. If the VAR is not of a struct
> type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
> and the mem_size is resolved to the size of the VAR's type.
> 
>  From the VAR btf_id, the verifier can also read the address of the
> ksym's corresponding kernel var from kallsyms and use that to fill
> dst_reg.
> 
> Therefore, the proper functionality of pseudo_btf_id depends on (1)
> kallsyms and (2) the encoding of kernel global VARs in pahole, which
> should be available since pahole v1.18.

I tried your patch with latest pahole but it did not generate
expected BTF_TYPE_VARs. My pahole head is:
   f3d9054ba8ff btf_encoder: Teach pahole to store percpu variables in 
vmlinux BTF.

First I made the following changes to facilitate debugging:
diff --git a/btf_encoder.c b/btf_encoder.c
index 982f59d..f94c3a6 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -334,6 +334,9 @@ int cu__encode_btf(struct cu *cu, int verbose, bool 
force)
                 /* percpu variables are allocated in global space */
                 if (variable__scope(var) != VSCOPE_GLOBAL)
                         continue;
+               /* type 0 is void, probably an internal error */
+               if (var->ip.tag.type == 0)
+                       continue;
                 has_global_var = true;
                 head = &hash_addr[hashaddr__fn(var->ip.addr)];
                 hlist_add_head(&var->tool_hnode, head);
@@ -399,8 +402,8 @@ int cu__encode_btf(struct cu *cu, int verbose, bool 
force)
                 }

                 if (verbose)
-                       printf("symbol '%s' of address 0x%lx encoded\n",
-                              sym_name, addr);
+                       printf("symbol '%s' of address 0x%lx encoded, 
type %u\n",
+                              sym_name, addr, type);

                 /* add a BTF_KIND_VAR in btfe->types */
                 linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : 
BTF_VAR_STATIC;
diff --git a/libbtf.c b/libbtf.c
index 7a01ded..3a0d8d7 100644
--- a/libbtf.c
+++ b/libbtf.c
@@ -304,6 +304,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
         [BTF_KIND_RESTRICT]     = "RESTRICT",
         [BTF_KIND_FUNC]         = "FUNC",
         [BTF_KIND_FUNC_PROTO]   = "FUNC_PROTO",
+       [BTF_KIND_VAR]          = "VAR",
+       [BTF_KIND_DATASEC]      = "DATASEC",
  };

  static const char *btf_elf__name_in_gobuf(const struct btf_elf *btfe, 
uint32_t offset)
@@ -671,7 +673,7 @@ int32_t btf_elf__add_var_type(struct btf_elf *btfe, 
uint32_t type, uint32_t name
                 return -1;
         }

-       btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s",
+       btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s\n",
                           t.type.type, btf_elf__name_in_gobuf(btfe, 
t.type.name_off));

         return btfe->type_index;

It would be good if you can add some of the above changes to
pahole for easier `pahole -JV` dump.

With the above change, I only got static per cpu variables.
For example,
    static DEFINE_PER_CPU(unsigned int , mirred_rec_level);
in net/sched/act_mirred.c.

[10] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
[74536] VAR 'mirred_rec_level' type_id=10, linkage=static

The dwarf debug_info entry for `mirred_rec_level`:
0x0001d8d6:   DW_TAG_variable
                 DW_AT_name      ("mirred_rec_level")
                 DW_AT_decl_file 
("/data/users/yhs/work/net-next/net/sched/act_mirred.c")
                 DW_AT_decl_line (31)
                 DW_AT_decl_column       (0x08)
                 DW_AT_type      (0x00000063 "unsigned int")
                 DW_AT_location  (DW_OP_addr 0x0)
It is not a declaration and it contains type.

All global per cpu variables do not have BTF_KIND_VAR generated.
I did a brief investigation and found this mostly like to be a
pahole issue. For example, for global per cpu variable
bpf_prog_active,
   include/linux/bpf.h
         DECLARE_PER_CPU(int , bpf_prog_active);
   kernel/bpf/syscall.c
         DEFINE_PER_CPU(int , bpf_prog_active);
it is declared in the header include/linux/bpf.h and
defined in kernel/bpf/syscall.c.

In many cu's, you will see:
0x0003592a:   DW_TAG_variable
                 DW_AT_name      ("bpf_prog_active")
                 DW_AT_decl_file 
("/data/users/yhs/work/net-next/include/linux/bpf.h")
                 DW_AT_decl_line (1074)
                 DW_AT_decl_column       (0x01)
                 DW_AT_type      (0x0001fa7e "int")
                 DW_AT_external  (true)
                 DW_AT_declaration       (true)

In kernel/bpf/syscall.c, I see
the following dwarf entry for real definition:
0x00013534:   DW_TAG_variable
                 DW_AT_name      ("bpf_prog_active")
                 DW_AT_decl_file 
("/data/users/yhs/work/net-next/include/linux/bpf.h")
                 DW_AT_decl_line (1074)
                 DW_AT_decl_column       (0x01)
                 DW_AT_type      (0x000000d6 "int")
                 DW_AT_external  (true)
                 DW_AT_declaration       (true)

0x00021a25:   DW_TAG_variable
                 DW_AT_specification     (0x00013534 "bpf_prog_active")
                 DW_AT_decl_file 
("/data/users/yhs/work/net-next/kernel/bpf/syscall.c")
                 DW_AT_decl_line (43)
                 DW_AT_location  (DW_OP_addr 0x0)

Note that for the second entry DW_AT_specification points to the 
declaration. I am not 100% sure whether pahole handle this properly or 
not. It generates a type id 0 (void) for bpf_prog_active variable.

Could you investigate this a little more?

I am using gcc 8.2.1. Using kernel default dwarf (dwarf 2) exposed
the above issue. Tries to use dwarf 4 and the problem still exists.


> 
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---
>   include/linux/btf.h      | 15 +++++++++
>   include/uapi/linux/bpf.h | 38 ++++++++++++++++------
>   kernel/bpf/btf.c         | 15 ---------
>   kernel/bpf/verifier.c    | 68 ++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 112 insertions(+), 24 deletions(-)
> 
[...]
Yonghong Song Aug. 20, 2020, 4:43 p.m. UTC | #2
On 8/19/20 3:40 PM, Hao Luo wrote:
> Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
> ksym so that further dereferences on the ksym can use the BTF info
> to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
> the verifier reads the btf_id stored in the insn[0]'s imm field and
> marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
> which is encoded in btf_vminux by pahole. If the VAR is not of a struct
> type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
> and the mem_size is resolved to the size of the VAR's type.
> 
>  From the VAR btf_id, the verifier can also read the address of the
> ksym's corresponding kernel var from kallsyms and use that to fill
> dst_reg.
> 
> Therefore, the proper functionality of pseudo_btf_id depends on (1)
> kallsyms and (2) the encoding of kernel global VARs in pahole, which
> should be available since pahole v1.18.
> 
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---
>   include/linux/btf.h      | 15 +++++++++
>   include/uapi/linux/bpf.h | 38 ++++++++++++++++------
>   kernel/bpf/btf.c         | 15 ---------
>   kernel/bpf/verifier.c    | 68 ++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 112 insertions(+), 24 deletions(-)
> 
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 8b81fbb4497c..cee4089e83c0 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -107,6 +107,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t)
>   	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
>   }
>   
> +static inline bool btf_type_is_var(const struct btf_type *t)
> +{
> +	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
> +}
> +
> +/* union is only a special case of struct:
> + * all its offsetof(member) == 0
> + */
> +static inline bool btf_type_is_struct(const struct btf_type *t)
> +{
> +	u8 kind = BTF_INFO_KIND(t->info);
> +
> +	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
> +}
> +
>   static inline u16 btf_type_vlen(const struct btf_type *t)
>   {
>   	return BTF_INFO_VLEN(t->info);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 0480f893facd..468376f2910b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -346,18 +346,38 @@ enum bpf_link_type {
>   #define BPF_F_TEST_STATE_FREQ	(1U << 3)
>   
>   /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
> - * two extensions:
> - *
> - * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
> - * insn[0].imm:      map fd              map fd
> - * insn[1].imm:      0                   offset into value
> - * insn[0].off:      0                   0
> - * insn[1].off:      0                   0
> - * ldimm64 rewrite:  address of map      address of map[0]+offset
> - * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
> + * the following extensions:
> + *
> + * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
> + * insn[0].imm:      map fd
> + * insn[1].imm:      0
> + * insn[0].off:      0
> + * insn[1].off:      0
> + * ldimm64 rewrite:  address of map
> + * verifier type:    CONST_PTR_TO_MAP
>    */
>   #define BPF_PSEUDO_MAP_FD	1
> +/*
> + * insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
> + * insn[0].imm:      map fd
> + * insn[1].imm:      offset into value
> + * insn[0].off:      0
> + * insn[1].off:      0
> + * ldimm64 rewrite:  address of map[0]+offset
> + * verifier type:    PTR_TO_MAP_VALUE
> + */
>   #define BPF_PSEUDO_MAP_VALUE	2
> +/*
> + * insn[0].src_reg:  BPF_PSEUDO_BTF_ID
> + * insn[0].imm:      kernel btd id of VAR
> + * insn[1].imm:      0
> + * insn[0].off:      0
> + * insn[1].off:      0
> + * ldimm64 rewrite:  address of the kernel variable
> + * verifier type:    PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
> + *                   is struct/union.
> + */
> +#define BPF_PSEUDO_BTF_ID	3
>   
>   /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
>    * offset to another bpf function
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 91afdd4c82e3..b6d8f653afe2 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -353,16 +353,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
>   	return !t || btf_type_nosize(t);
>   }
>   
> -/* union is only a special case of struct:
> - * all its offsetof(member) == 0
> - */
> -static bool btf_type_is_struct(const struct btf_type *t)
> -{
> -	u8 kind = BTF_INFO_KIND(t->info);
> -
> -	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
> -}
> -
>   static bool __btf_type_is_struct(const struct btf_type *t)
>   {
>   	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
> @@ -373,11 +363,6 @@ static bool btf_type_is_array(const struct btf_type *t)
>   	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
>   }
>   
> -static bool btf_type_is_var(const struct btf_type *t)
> -{
> -	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
> -}
> -
>   static bool btf_type_is_datasec(const struct btf_type *t)
>   {
>   	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index ef938f17b944..47badde71f83 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -7205,6 +7205,68 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
>   	return 0;
>   }
>   
> +/* verify ld_imm64 insn of type PSEUDO_BTF_ID is valid */
> +static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
> +				      struct bpf_insn *insn)
> +{
> +	struct bpf_reg_state *regs = cur_regs(env);
> +	u32 type, id = insn->imm;
> +	u64 addr;
> +	const char *sym_name;
> +	const struct btf_type *t = btf_type_by_id(btf_vmlinux, id);

Since this is new code, please try to conform to reverse christmas tree 
coding style. For the last one, the assignment no need to be in
declaration, you can put "t = ..." right before the first use of "t".

same for other places.

> +
> +	if (!t) {
> +		verbose(env, "%s: invalid btf_id %d\n", __func__, id);
> +		return -ENOENT;
> +	}
> +
> +	if (insn[1].imm != 0) {
> +		verbose(env, "%s: BPF_PSEUDO_BTF_ID uses reserved fields\n",
> +			__func__);
> +		return -EINVAL;
> +	}
> +
> +	if (!btf_type_is_var(t)) {
> +		verbose(env, "%s: btf_id %d isn't KIND_VAR\n", __func__, id);
> +		return -EINVAL;
> +	}
> +
> +	sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
> +	addr = kallsyms_lookup_name(sym_name);
> +	if (!addr) {
> +		verbose(env, "%s: failed to find the address of symbol '%s'.\n",
> +			__func__, sym_name);
> +		return -ENOENT;
> +	}
> +
> +	insn[0].imm = (u32)addr;
> +	insn[1].imm = addr >> 32;
> +	mark_reg_known_zero(env, regs, insn->dst_reg);
> +
> +	type = t->type;
> +	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
> +	if (!btf_type_is_struct(t)) {
> +		u32 tsize;
> +		const struct btf_type *ret;
> +		const char *tname; > +
> +		/* resolve the type size of ksym. */
> +		ret = btf_resolve_size(btf_vmlinux, t, &tsize, NULL, NULL);
> +		if (IS_ERR(ret)) {
> +			tname = btf_name_by_offset(btf_vmlinux, t->name_off);
> +			verbose(env, "unable to resolve the size of type '%s': %ld\n",
> +				tname, PTR_ERR(ret));
> +			return -EINVAL;
> +		}
> +		regs[insn->dst_reg].type = PTR_TO_MEM;
> +		regs[insn->dst_reg].mem_size = tsize;
> +	} else {
> +		regs[insn->dst_reg].type = PTR_TO_BTF_ID;
> +		regs[insn->dst_reg].btf_id = type;
> +	}
> +	return 0;
> +}
> +
>   /* verify BPF_LD_IMM64 instruction */
[...]
Hao Luo Aug. 20, 2020, 5:04 p.m. UTC | #3
Yonghong,

Thank you for taking a look. Explicitly cc'ing Arnaldo to see if he
has any immediate insights. In the meantime, I'll dedicate time to
investigate this issue you found.

Thanks,
Hao


On Thu, Aug 20, 2020 at 8:23 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 8/19/20 3:40 PM, Hao Luo wrote:
> > Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a
> > ksym so that further dereferences on the ksym can use the BTF info
> > to validate accesses. Internally, when seeing a pseudo_btf_id ld insn,
> > the verifier reads the btf_id stored in the insn[0]'s imm field and
> > marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND,
> > which is encoded in btf_vminux by pahole. If the VAR is not of a struct
> > type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID
> > and the mem_size is resolved to the size of the VAR's type.
> >
> >  From the VAR btf_id, the verifier can also read the address of the
> > ksym's corresponding kernel var from kallsyms and use that to fill
> > dst_reg.
> >
> > Therefore, the proper functionality of pseudo_btf_id depends on (1)
> > kallsyms and (2) the encoding of kernel global VARs in pahole, which
> > should be available since pahole v1.18.
>
> I tried your patch with latest pahole but it did not generate
> expected BTF_TYPE_VARs. My pahole head is:
>    f3d9054ba8ff btf_encoder: Teach pahole to store percpu variables in
> vmlinux BTF.
>
> First I made the following changes to facilitate debugging:
> diff --git a/btf_encoder.c b/btf_encoder.c
> index 982f59d..f94c3a6 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -334,6 +334,9 @@ int cu__encode_btf(struct cu *cu, int verbose, bool
> force)
>                  /* percpu variables are allocated in global space */
>                  if (variable__scope(var) != VSCOPE_GLOBAL)
>                          continue;
> +               /* type 0 is void, probably an internal error */
> +               if (var->ip.tag.type == 0)
> +                       continue;
>                  has_global_var = true;
>                  head = &hash_addr[hashaddr__fn(var->ip.addr)];
>                  hlist_add_head(&var->tool_hnode, head);
> @@ -399,8 +402,8 @@ int cu__encode_btf(struct cu *cu, int verbose, bool
> force)
>                  }
>
>                  if (verbose)
> -                       printf("symbol '%s' of address 0x%lx encoded\n",
> -                              sym_name, addr);
> +                       printf("symbol '%s' of address 0x%lx encoded,
> type %u\n",
> +                              sym_name, addr, type);
>
>                  /* add a BTF_KIND_VAR in btfe->types */
>                  linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED :
> BTF_VAR_STATIC;
> diff --git a/libbtf.c b/libbtf.c
> index 7a01ded..3a0d8d7 100644
> --- a/libbtf.c
> +++ b/libbtf.c
> @@ -304,6 +304,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
>          [BTF_KIND_RESTRICT]     = "RESTRICT",
>          [BTF_KIND_FUNC]         = "FUNC",
>          [BTF_KIND_FUNC_PROTO]   = "FUNC_PROTO",
> +       [BTF_KIND_VAR]          = "VAR",
> +       [BTF_KIND_DATASEC]      = "DATASEC",
>   };
>
>   static const char *btf_elf__name_in_gobuf(const struct btf_elf *btfe,
> uint32_t offset)
> @@ -671,7 +673,7 @@ int32_t btf_elf__add_var_type(struct btf_elf *btfe,
> uint32_t type, uint32_t name
>                  return -1;
>          }
>
> -       btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s",
> +       btf_elf__log_type(btfe, &t.type, false, false, "type=%u name=%s\n",
>                            t.type.type, btf_elf__name_in_gobuf(btfe,
> t.type.name_off));
>
>          return btfe->type_index;
>
> It would be good if you can add some of the above changes to
> pahole for easier `pahole -JV` dump.
>
> With the above change, I only got static per cpu variables.
> For example,
>     static DEFINE_PER_CPU(unsigned int , mirred_rec_level);
> in net/sched/act_mirred.c.
>
> [10] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
> [74536] VAR 'mirred_rec_level' type_id=10, linkage=static
>
> The dwarf debug_info entry for `mirred_rec_level`:
> 0x0001d8d6:   DW_TAG_variable
>                  DW_AT_name      ("mirred_rec_level")
>                  DW_AT_decl_file
> ("/data/users/yhs/work/net-next/net/sched/act_mirred.c")
>                  DW_AT_decl_line (31)
>                  DW_AT_decl_column       (0x08)
>                  DW_AT_type      (0x00000063 "unsigned int")
>                  DW_AT_location  (DW_OP_addr 0x0)
> It is not a declaration and it contains type.
>
> All global per cpu variables do not have BTF_KIND_VAR generated.
> I did a brief investigation and found this mostly like to be a
> pahole issue. For example, for global per cpu variable
> bpf_prog_active,
>    include/linux/bpf.h
>          DECLARE_PER_CPU(int , bpf_prog_active);
>    kernel/bpf/syscall.c
>          DEFINE_PER_CPU(int , bpf_prog_active);
> it is declared in the header include/linux/bpf.h and
> defined in kernel/bpf/syscall.c.
>
> In many cu's, you will see:
> 0x0003592a:   DW_TAG_variable
>                  DW_AT_name      ("bpf_prog_active")
>                  DW_AT_decl_file
> ("/data/users/yhs/work/net-next/include/linux/bpf.h")
>                  DW_AT_decl_line (1074)
>                  DW_AT_decl_column       (0x01)
>                  DW_AT_type      (0x0001fa7e "int")
>                  DW_AT_external  (true)
>                  DW_AT_declaration       (true)
>
> In kernel/bpf/syscall.c, I see
> the following dwarf entry for real definition:
> 0x00013534:   DW_TAG_variable
>                  DW_AT_name      ("bpf_prog_active")
>                  DW_AT_decl_file
> ("/data/users/yhs/work/net-next/include/linux/bpf.h")
>                  DW_AT_decl_line (1074)
>                  DW_AT_decl_column       (0x01)
>                  DW_AT_type      (0x000000d6 "int")
>                  DW_AT_external  (true)
>                  DW_AT_declaration       (true)
>
> 0x00021a25:   DW_TAG_variable
>                  DW_AT_specification     (0x00013534 "bpf_prog_active")
>                  DW_AT_decl_file
> ("/data/users/yhs/work/net-next/kernel/bpf/syscall.c")
>                  DW_AT_decl_line (43)
>                  DW_AT_location  (DW_OP_addr 0x0)
>
> Note that for the second entry DW_AT_specification points to the
> declaration. I am not 100% sure whether pahole handle this properly or
> not. It generates a type id 0 (void) for bpf_prog_active variable.
>
> Could you investigate this a little more?
>
> I am using gcc 8.2.1. Using kernel default dwarf (dwarf 2) exposed
> the above issue. Tries to use dwarf 4 and the problem still exists.
>
>
> >
> > Signed-off-by: Hao Luo <haoluo@google.com>
> > ---
> >   include/linux/btf.h      | 15 +++++++++
> >   include/uapi/linux/bpf.h | 38 ++++++++++++++++------
> >   kernel/bpf/btf.c         | 15 ---------
> >   kernel/bpf/verifier.c    | 68 ++++++++++++++++++++++++++++++++++++++++
> >   4 files changed, 112 insertions(+), 24 deletions(-)
> >
> [...]
Alexei Starovoitov Aug. 20, 2020, 9:53 p.m. UTC | #4
On Wed, Aug 19, 2020 at 03:40:23PM -0700, Hao Luo wrote:
> +
>  /* verify BPF_LD_IMM64 instruction */
>  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
>  {
> @@ -7234,6 +7296,9 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
>  		return 0;
>  	}
>  
> +	if (insn->src_reg == BPF_PSEUDO_BTF_ID)
> +		return check_pseudo_btf_id(env, insn);
> +
>  	map = env->used_maps[aux->map_index];
>  	mark_reg_known_zero(env, regs, insn->dst_reg);
>  	regs[insn->dst_reg].map_ptr = map;
> @@ -9255,6 +9320,9 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
>  				/* valid generic load 64-bit imm */
>  				goto next_insn;
>  
> +			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID)
> +				goto next_insn;
> +

Why did you choose to do it during main do_check() walk instead of this pre-pass ?
check_ld_imm() can be called multiple times for the same insn,
so it's faster and less surprising to do it during replace_map_fd_with_map_ptr().
BTF needs to be parsed first, of course.
You can either move check_btf_info() before replace_map_fd_with_map_ptr() or
move replace_map_fd_with_map_ptr() after check_btf_info().
The latter is probably cleaner.
Hao Luo Aug. 21, 2020, 2:22 a.m. UTC | #5
On Thu, Aug 20, 2020 at 2:53 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> Why did you choose to do it during main do_check() walk instead of this pre-pass ?
> check_ld_imm() can be called multiple times for the same insn,
> so it's faster and less surprising to do it during replace_map_fd_with_map_ptr().
> BTF needs to be parsed first, of course.
> You can either move check_btf_info() before replace_map_fd_with_map_ptr() or
> move replace_map_fd_with_map_ptr() after check_btf_info().
> The latter is probably cleaner.

Ah, that does make more sense. I can move
replace_map_fd_with_map_ptr() after check_btf_info() (or
check_attach_btf_id()) and rename it to better reflect the fact that
it's not just for maps now.

Thanks for the insights!
Hao
Hao Luo Aug. 25, 2020, 12:05 a.m. UTC | #6
Yonghong,

An update on this thread. I successfully reproduced this issue on a
8.2.0 gcc compiler, It looks like gcc 4.9 did not have this issue. I
was also using clang which did not show this bug.

It seems having a DW_AT_specification that refers to another
DW_TAG_variable isn't handled in pahole. I have a (maybe hacky) pahole
patch as fix and let me clean it up and post for review soon.

Hao
Yonghong Song Aug. 25, 2020, 12:43 a.m. UTC | #7
On 8/24/20 5:05 PM, Hao Luo wrote:
> Yonghong,
> 
> An update on this thread. I successfully reproduced this issue on a
> 8.2.0 gcc compiler, It looks like gcc 4.9 did not have this issue. I
> was also using clang which did not show this bug.
> 
> It seems having a DW_AT_specification that refers to another
> DW_TAG_variable isn't handled in pahole. I have a (maybe hacky) pahole
> patch as fix and let me clean it up and post for review soon.

Sounds good. Thanks for fixing it in pahole! People may use gcc 4.9 up 
to gcc 10, or llvm compiler (let us say llvm10 which is the version 
which has CO-RE support and is recommended for compiling bpf
programs.) Once you have fix for gcc 8.2, it might be worthwhile to
check a few other gcc/llvm version (at least gcc8/9/10 and llvm10)
to ensure it still works.

Currently, by default kernel is using DWARF2, pahole has a good
support for dwarf2. I am not sure whether it supports dwarf4 well
or not. But I think additional dwarf4 support, if needed, can
be done a little bit later as dwarf4 is not widely used yet for kernel,
I think.


> 
> Hao
>
diff mbox series

Patch

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8b81fbb4497c..cee4089e83c0 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -107,6 +107,21 @@  static inline bool btf_type_is_func_proto(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
 }
 
+static inline bool btf_type_is_var(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+/* union is only a special case of struct:
+ * all its offsetof(member) == 0
+ */
+static inline bool btf_type_is_struct(const struct btf_type *t)
+{
+	u8 kind = BTF_INFO_KIND(t->info);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
 static inline u16 btf_type_vlen(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0480f893facd..468376f2910b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -346,18 +346,38 @@  enum bpf_link_type {
 #define BPF_F_TEST_STATE_FREQ	(1U << 3)
 
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
- * two extensions:
- *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd              map fd
- * insn[1].imm:      0                   offset into value
- * insn[0].off:      0                   0
- * insn[1].off:      0                   0
- * ldimm64 rewrite:  address of map      address of map[0]+offset
- * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+ * the following extensions:
+ *
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
+ * insn[0].imm:      map fd
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map
+ * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
+/*
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm:      map fd
+ * insn[1].imm:      offset into value
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of map[0]+offset
+ * verifier type:    PTR_TO_MAP_VALUE
+ */
 #define BPF_PSEUDO_MAP_VALUE	2
+/*
+ * insn[0].src_reg:  BPF_PSEUDO_BTF_ID
+ * insn[0].imm:      kernel btd id of VAR
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the kernel variable
+ * verifier type:    PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var
+ *                   is struct/union.
+ */
+#define BPF_PSEUDO_BTF_ID	3
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 91afdd4c82e3..b6d8f653afe2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -353,16 +353,6 @@  static bool btf_type_nosize_or_null(const struct btf_type *t)
 	return !t || btf_type_nosize(t);
 }
 
-/* union is only a special case of struct:
- * all its offsetof(member) == 0
- */
-static bool btf_type_is_struct(const struct btf_type *t)
-{
-	u8 kind = BTF_INFO_KIND(t->info);
-
-	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
-}
-
 static bool __btf_type_is_struct(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
@@ -373,11 +363,6 @@  static bool btf_type_is_array(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
 }
 
-static bool btf_type_is_var(const struct btf_type *t)
-{
-	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
-}
-
 static bool btf_type_is_datasec(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ef938f17b944..47badde71f83 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7205,6 +7205,68 @@  static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	return 0;
 }
 
+/* verify ld_imm64 insn of type PSEUDO_BTF_ID is valid */
+static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
+				      struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = cur_regs(env);
+	u32 type, id = insn->imm;
+	u64 addr;
+	const char *sym_name;
+	const struct btf_type *t = btf_type_by_id(btf_vmlinux, id);
+
+	if (!t) {
+		verbose(env, "%s: invalid btf_id %d\n", __func__, id);
+		return -ENOENT;
+	}
+
+	if (insn[1].imm != 0) {
+		verbose(env, "%s: BPF_PSEUDO_BTF_ID uses reserved fields\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!btf_type_is_var(t)) {
+		verbose(env, "%s: btf_id %d isn't KIND_VAR\n", __func__, id);
+		return -EINVAL;
+	}
+
+	sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+	addr = kallsyms_lookup_name(sym_name);
+	if (!addr) {
+		verbose(env, "%s: failed to find the address of symbol '%s'.\n",
+			__func__, sym_name);
+		return -ENOENT;
+	}
+
+	insn[0].imm = (u32)addr;
+	insn[1].imm = addr >> 32;
+	mark_reg_known_zero(env, regs, insn->dst_reg);
+
+	type = t->type;
+	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+	if (!btf_type_is_struct(t)) {
+		u32 tsize;
+		const struct btf_type *ret;
+		const char *tname;
+
+		/* resolve the type size of ksym. */
+		ret = btf_resolve_size(btf_vmlinux, t, &tsize, NULL, NULL);
+		if (IS_ERR(ret)) {
+			tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+			verbose(env, "unable to resolve the size of type '%s': %ld\n",
+				tname, PTR_ERR(ret));
+			return -EINVAL;
+		}
+		regs[insn->dst_reg].type = PTR_TO_MEM;
+		regs[insn->dst_reg].mem_size = tsize;
+	} else {
+		regs[insn->dst_reg].type = PTR_TO_BTF_ID;
+		regs[insn->dst_reg].btf_id = type;
+	}
+	return 0;
+}
+
 /* verify BPF_LD_IMM64 instruction */
 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
@@ -7234,6 +7296,9 @@  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return 0;
 	}
 
+	if (insn->src_reg == BPF_PSEUDO_BTF_ID)
+		return check_pseudo_btf_id(env, insn);
+
 	map = env->used_maps[aux->map_index];
 	mark_reg_known_zero(env, regs, insn->dst_reg);
 	regs[insn->dst_reg].map_ptr = map;
@@ -9255,6 +9320,9 @@  static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 				/* valid generic load 64-bit imm */
 				goto next_insn;
 
+			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID)
+				goto next_insn;
+
 			/* In final convert_pseudo_ld_imm64() step, this is
 			 * converted into regular 64-bit imm load insn.
 			 */