diff mbox series

[bpf-next,v1,4/8] bpf/libbpf: BTF support for typed ksyms

Message ID 20200819224030.1615203-5-haoluo@google.com
State New
Headers show
Series bpf: BTF support for ksyms | expand

Commit Message

Hao Luo Aug. 19, 2020, 10:40 p.m. UTC
If a ksym is defined with a type, libbpf will try to find the ksym's btf
information from kernel btf. If a valid btf entry for the ksym is found,
libbpf can pass in the found btf id to the verifier, which validates the
ksym's type and value.

Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
but it has the symbol's address (read from kallsyms) and its value is
treated as a raw pointer.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 114 insertions(+), 16 deletions(-)

Comments

Andrii Nakryiko Aug. 21, 2020, 10:37 p.m. UTC | #1
On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
>
> If a ksym is defined with a type, libbpf will try to find the ksym's btf
> information from kernel btf. If a valid btf entry for the ksym is found,
> libbpf can pass in the found btf id to the verifier, which validates the
> ksym's type and value.
>
> Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> but it has the symbol's address (read from kallsyms) and its value is
> treated as a raw pointer.
>
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---
>  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 114 insertions(+), 16 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 4a81c6b2d21b..94eff612c7c2 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -357,7 +357,16 @@ struct extern_desc {
>                         bool is_signed;
>                 } kcfg;
>                 struct {
> -                       unsigned long long addr;
> +                       /*
> +                        *  1. If ksym is typeless, the field 'addr' is valid.
> +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> +                        *     valid.
> +                        */
> +                       bool is_typeless;
> +                       union {
> +                               unsigned long long addr;
> +                               int vmlinux_btf_id;
> +                       };

ksym is 16 bytes anyways, union doesn't help to save space. I propose
to encode all this with just two fields: vmlinux_btf_id and addr. If
btf_id == 0, then extern is typeless.

>                 } ksym;
>         };
>  };
> @@ -382,6 +391,7 @@ struct bpf_object {
>
>         bool loaded;
>         bool has_pseudo_calls;
> +       bool has_typed_ksyms;
>
>         /*
>          * Information when doing elf related work. Only valid if fd
> @@ -2521,6 +2531,10 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
>         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
>                 need_vmlinux_btf = true;
>
> +       /* Support for typed ksyms needs kernel BTF */
> +       if (obj->has_typed_ksyms)
> +               need_vmlinux_btf = true;

On the second read, I don't think you really need `has_typed_ksyms` at
all. Just iterate over ksym externs and see if you have a typed one.
It's the only place that cares.

> +
>         bpf_object__for_each_program(prog, obj) {
>                 if (!prog->load)
>                         continue;
> @@ -2975,10 +2989,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
>                         ext->type = EXT_KSYM;
>
>                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
> -                       if (!btf_is_void(vt)) {
> -                               pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
> -                               return -ENOTSUP;
> -                       }
> +                       ext->ksym.is_typeless = btf_is_void(vt);
> +
> +                       if (!obj->has_typed_ksyms && !ext->ksym.is_typeless)
> +                               obj->has_typed_ksyms = true;

nit: keep it simple:

if (ext->ksym.is_typeless)
  obj->has_typed_ksyms = true;

>                 } else {
>                         pr_warn("unrecognized extern section '%s'\n", sec_name);
>                         return -ENOTSUP;
> @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
>         /* sort externs by type, for kcfg ones also by (align, size, name) */
>         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
>
> -       /* for .ksyms section, we need to turn all externs into allocated
> -        * variables in BTF to pass kernel verification; we do this by
> -        * pretending that each extern is a 8-byte variable
> +       /* for .ksyms section, we need to turn all typeless externs into
> +        * allocated variables in BTF to pass kernel verification; we do
> +        * this by pretending that each typeless extern is a 8-byte variable
>          */
>         if (ksym_sec) {
>                 /* find existing 4-byte integer type in BTF to use for fake
> @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
>
>                 sec = ksym_sec;
>                 n = btf_vlen(sec);
> -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> +               for (i = 0, off = 0; i < n; i++) {
>                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
>                         struct btf_type *vt;
>
> @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
>                                 return -ESRCH;
>                         }
>                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> -                       vt->type = int_btf_id;
> +                       if (ext->ksym.is_typeless) {
> +                               vt->type = int_btf_id;
> +                               vs->size = sizeof(int);
> +                       }
>                         vs->offset = off;
> -                       vs->size = sizeof(int);
> +                       off += vs->size;
> +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> +                                ext->name, vt->type, vs->size, vs->offset);

It's a bit of a waste that we still allocate memory for those typed
ksym externs, as they don't really need space. But modifying BTF is a
pain right now, so I think we'll have to do it, until we have a better
BTF API. But let's make them integers for now to take a fixed and
small amount of space.

>                 }
>                 sec->size = off;
>         }
> @@ -5300,8 +5319,13 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
>                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
>                                 insn[1].imm = ext->kcfg.data_off;
>                         } else /* EXT_KSYM */ {
> -                               insn[0].imm = (__u32)ext->ksym.addr;
> -                               insn[1].imm = ext->ksym.addr >> 32;
> +                               if (ext->ksym.is_typeless) { /* typelss ksyms */

typo: typeless

> +                                       insn[0].imm = (__u32)ext->ksym.addr;
> +                                       insn[1].imm = ext->ksym.addr >> 32;
> +                               } else { /* typed ksyms */
> +                                       insn[0].src_reg = BPF_PSEUDO_BTF_ID;
> +                                       insn[0].imm = ext->ksym.vmlinux_btf_id;
> +                               }
>                         }
>                         break;
>                 case RELO_CALL:
> @@ -6019,6 +6043,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
>                 if (!ext || ext->type != EXT_KSYM)
>                         continue;
>
> +               /* Typed ksyms have the verifier to fill their addresses. */
> +               if (!ext->ksym.is_typeless)
> +                       continue;

It might still be a good idea to try to find the symbol in kallsyms
and emit nicer message if it's not there. Think about the user's
experience when some global variable is removed from the kernel (or
compiled out due to missing Kconfig). If libbpf can easily detect
this, we should do it and provide a good error message.

> +
>                 if (ext->is_set && ext->ksym.addr != sym_addr) {
>                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
>                                 sym_name, ext->ksym.addr, sym_addr);
> @@ -6037,10 +6065,72 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
>         return err;
>  }
>
> +static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
> +{
> +       struct extern_desc *ext;
> +       int i, id;
> +
> +       if (!obj->btf_vmlinux) {
> +               pr_warn("support of typed ksyms needs kernel btf.\n");
> +               return -ENOENT;
> +       }
> +
> +       for (i = 0; i < obj->nr_extern; i++) {
> +               const struct btf_type *v, *vx; /* VARs in object and vmlinux btf */
> +               const struct btf_type *t, *tx; /* TYPEs in btf */
> +               __u32 vt, vtx; /* btf_ids of TYPEs */

I use targ_ and local_ prefixes with CO-RE to distinguish something
that's coming from BPF object's BTF vs kernel's BTF, can you please
adopt the same, as the process is essentially the same. "vx" vs "vtx"
vs "v" are hard to distinguish and understand.

> +
> +               ext = &obj->externs[i];
> +               if (ext->type != EXT_KSYM)
> +                       continue;
> +
> +               if (ext->ksym.is_typeless)
> +                       continue;

nit: combine into a single filter condition (we need typed ksym,
that's one check)

> +
> +               if (ext->is_set) {
> +                       pr_warn("typed ksym '%s' resolved as typeless ksyms.\n",
> +                               ext->name);
> +                       return -EFAULT;

this is a bug if that happens, that should be caught by test, please
remove unnecessary check

> +               }
> +
> +               id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
> +                                           BTF_KIND_VAR);
> +               if (id <= 0) {
> +                       pr_warn("no btf entry for ksym '%s' in vmlinux.\n",
> +                               ext->name);
> +                       return -ESRCH;
> +               }
> +
> +               vx = btf__type_by_id(obj->btf_vmlinux, id);
> +               tx = skip_mods_and_typedefs(obj->btf_vmlinux, vx->type, &vtx);
> +
> +               v = btf__type_by_id(obj->btf, ext->btf_id);
> +               t = skip_mods_and_typedefs(obj->btf, v->type, &vt);
> +
> +               if (!btf_ksym_type_match(obj->btf_vmlinux, vtx, obj->btf, vt)) {
> +                       const char *tname, *txname; /* names of TYPEs */
> +
> +                       txname = btf__name_by_offset(obj->btf_vmlinux, tx->name_off);
> +                       tname = btf__name_by_offset(obj->btf, t->name_off);
> +
> +                       pr_warn("ksym '%s' expects type '%s' (vmlinux_btf_id: #%d), "
> +                               "but got '%s' (btf_id: #%d)\n", ext->name,
> +                               txname, vtx, tname, vt);
> +                       return -EINVAL;
> +               }

yeah, definitely just use bpf_core_types_are_compat() here. You'll
want to skip_mods_and_typedefs first, but everything else should work
for your use case.

> +
> +               ext->is_set = true;
> +               ext->ksym.vmlinux_btf_id = id;
> +               pr_debug("extern (ksym) %s=vmlinux_btf_id(#%d)\n", ext->name, id);
> +       }
> +       return 0;
> +}
> +

[...]
Hao Luo Aug. 27, 2020, 10:29 p.m. UTC | #2
On Fri, Aug 21, 2020 at 3:37 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> >
> > If a ksym is defined with a type, libbpf will try to find the ksym's btf
> > information from kernel btf. If a valid btf entry for the ksym is found,
> > libbpf can pass in the found btf id to the verifier, which validates the
> > ksym's type and value.
> >
> > Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> > but it has the symbol's address (read from kallsyms) and its value is
> > treated as a raw pointer.
> >
> > Signed-off-by: Hao Luo <haoluo@google.com>
> > ---
> >  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 114 insertions(+), 16 deletions(-)
> >
> > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > index 4a81c6b2d21b..94eff612c7c2 100644
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -357,7 +357,16 @@ struct extern_desc {
> >                         bool is_signed;
> >                 } kcfg;
> >                 struct {
> > -                       unsigned long long addr;
> > +                       /*
> > +                        *  1. If ksym is typeless, the field 'addr' is valid.
> > +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> > +                        *     valid.
> > +                        */
> > +                       bool is_typeless;
> > +                       union {
> > +                               unsigned long long addr;
> > +                               int vmlinux_btf_id;
> > +                       };
>
> ksym is 16 bytes anyways, union doesn't help to save space. I propose
> to encode all this with just two fields: vmlinux_btf_id and addr. If
> btf_id == 0, then extern is typeless.

Ack on expanding the union. But I slightly preferred keeping
is_typeless. IIUC, btf_id points a VAR_KIND, we need the following
pointer chasing every time

t = btf__type_by_id(obj->btf, ext->btf_id);
t->type;

which I felt is worse than keeping a is_typeless flag.

>
> >                 } ksym;
> >         };
> >  };
> > @@ -382,6 +391,7 @@ struct bpf_object {
> >
> >         bool loaded;
> >         bool has_pseudo_calls;
> > +       bool has_typed_ksyms;
> >
> >         /*
> >          * Information when doing elf related work. Only valid if fd
> > @@ -2521,6 +2531,10 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
> >         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
> >                 need_vmlinux_btf = true;
> >
> > +       /* Support for typed ksyms needs kernel BTF */
> > +       if (obj->has_typed_ksyms)
> > +               need_vmlinux_btf = true;
>
> On the second read, I don't think you really need `has_typed_ksyms` at
> all. Just iterate over ksym externs and see if you have a typed one.
> It's the only place that cares.
>

Ack. Will iterate over ksym externs here.

> > +
> >         bpf_object__for_each_program(prog, obj) {
> >                 if (!prog->load)
> >                         continue;
> > @@ -2975,10 +2989,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> >                         ext->type = EXT_KSYM;
> >
> >                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
> > -                       if (!btf_is_void(vt)) {
> > -                               pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
> > -                               return -ENOTSUP;
> > -                       }
> > +                       ext->ksym.is_typeless = btf_is_void(vt);
> > +
> > +                       if (!obj->has_typed_ksyms && !ext->ksym.is_typeless)
> > +                               obj->has_typed_ksyms = true;
>
> nit: keep it simple:
>
> if (ext->ksym.is_typeless)
>   obj->has_typed_ksyms = true;
>

Ack.

> >                 } else {
> >                         pr_warn("unrecognized extern section '%s'\n", sec_name);
> >                         return -ENOTSUP;
> > @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> >         /* sort externs by type, for kcfg ones also by (align, size, name) */
> >         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
> >
> > -       /* for .ksyms section, we need to turn all externs into allocated
> > -        * variables in BTF to pass kernel verification; we do this by
> > -        * pretending that each extern is a 8-byte variable
> > +       /* for .ksyms section, we need to turn all typeless externs into
> > +        * allocated variables in BTF to pass kernel verification; we do
> > +        * this by pretending that each typeless extern is a 8-byte variable
> >          */
> >         if (ksym_sec) {
> >                 /* find existing 4-byte integer type in BTF to use for fake
> > @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> >
> >                 sec = ksym_sec;
> >                 n = btf_vlen(sec);
> > -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> > +               for (i = 0, off = 0; i < n; i++) {
> >                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
> >                         struct btf_type *vt;
> >
> > @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> >                                 return -ESRCH;
> >                         }
> >                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> > -                       vt->type = int_btf_id;
> > +                       if (ext->ksym.is_typeless) {
> > +                               vt->type = int_btf_id;
> > +                               vs->size = sizeof(int);
> > +                       }
> >                         vs->offset = off;
> > -                       vs->size = sizeof(int);
> > +                       off += vs->size;
> > +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> > +                                ext->name, vt->type, vs->size, vs->offset);
>
> It's a bit of a waste that we still allocate memory for those typed
> ksym externs, as they don't really need space. But modifying BTF is a
> pain right now, so I think we'll have to do it, until we have a better
> BTF API. But let's make them integers for now to take a fixed and
> small amount of space.
>

Do you mean making typed ksym externs of type integer? If so, we can't
do that, I think. After collect_externs, we later need to compare the
declared extern's type against the type defined in kernel. Better not
rewrite their types in BTf.

I am generally against modifying BTF. I initially didn't notice that
all the ksym externs' types are modified to 'int' and the type
comparison I mentioned above always failed. I dumped the btf in
vmlinux and the btf in object file, checked the kernel variable's
source code, printed out everything I could. The experience was very
bad.

> >                 }
> >                 sec->size = off;
> >         }
> > @@ -5300,8 +5319,13 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
> >                                 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
> >                                 insn[1].imm = ext->kcfg.data_off;
> >                         } else /* EXT_KSYM */ {
> > -                               insn[0].imm = (__u32)ext->ksym.addr;
> > -                               insn[1].imm = ext->ksym.addr >> 32;
> > +                               if (ext->ksym.is_typeless) { /* typelss ksyms */
>
> typo: typeless
>

Ack.

> > +                                       insn[0].imm = (__u32)ext->ksym.addr;
> > +                                       insn[1].imm = ext->ksym.addr >> 32;
> > +                               } else { /* typed ksyms */
> > +                                       insn[0].src_reg = BPF_PSEUDO_BTF_ID;
> > +                                       insn[0].imm = ext->ksym.vmlinux_btf_id;
> > +                               }
> >                         }
> >                         break;
> >                 case RELO_CALL:
> > @@ -6019,6 +6043,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
> >                 if (!ext || ext->type != EXT_KSYM)
> >                         continue;
> >
> > +               /* Typed ksyms have the verifier to fill their addresses. */
> > +               if (!ext->ksym.is_typeless)
> > +                       continue;
>
> It might still be a good idea to try to find the symbol in kallsyms
> and emit nicer message if it's not there. Think about the user's
> experience when some global variable is removed from the kernel (or
> compiled out due to missing Kconfig). If libbpf can easily detect
> this, we should do it and provide a good error message.
>

Ack.

> > +
> >                 if (ext->is_set && ext->ksym.addr != sym_addr) {
> >                         pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
> >                                 sym_name, ext->ksym.addr, sym_addr);
> > @@ -6037,10 +6065,72 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
> >         return err;
> >  }
> >
> > +static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
> > +{
> > +       struct extern_desc *ext;
> > +       int i, id;
> > +
> > +       if (!obj->btf_vmlinux) {
> > +               pr_warn("support of typed ksyms needs kernel btf.\n");
> > +               return -ENOENT;
> > +       }
> > +
> > +       for (i = 0; i < obj->nr_extern; i++) {
> > +               const struct btf_type *v, *vx; /* VARs in object and vmlinux btf */
> > +               const struct btf_type *t, *tx; /* TYPEs in btf */
> > +               __u32 vt, vtx; /* btf_ids of TYPEs */
>
> I use targ_ and local_ prefixes with CO-RE to distinguish something
> that's coming from BPF object's BTF vs kernel's BTF, can you please
> adopt the same, as the process is essentially the same. "vx" vs "vtx"
> vs "v" are hard to distinguish and understand.
>

No problem. Will follow the convention.

> > +
> > +               ext = &obj->externs[i];
> > +               if (ext->type != EXT_KSYM)
> > +                       continue;
> > +
> > +               if (ext->ksym.is_typeless)
> > +                       continue;
>
> nit: combine into a single filter condition (we need typed ksym,
> that's one check)
>

Ack.

> > +
> > +               if (ext->is_set) {
> > +                       pr_warn("typed ksym '%s' resolved as typeless ksyms.\n",
> > +                               ext->name);
> > +                       return -EFAULT;
>
> this is a bug if that happens, that should be caught by test, please
> remove unnecessary check
>

Ack.

> > +               }
> > +
> > +               id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
> > +                                           BTF_KIND_VAR);
> > +               if (id <= 0) {
> > +                       pr_warn("no btf entry for ksym '%s' in vmlinux.\n",
> > +                               ext->name);
> > +                       return -ESRCH;
> > +               }
> > +
> > +               vx = btf__type_by_id(obj->btf_vmlinux, id);
> > +               tx = skip_mods_and_typedefs(obj->btf_vmlinux, vx->type, &vtx);
> > +
> > +               v = btf__type_by_id(obj->btf, ext->btf_id);
> > +               t = skip_mods_and_typedefs(obj->btf, v->type, &vt);
> > +
> > +               if (!btf_ksym_type_match(obj->btf_vmlinux, vtx, obj->btf, vt)) {
> > +                       const char *tname, *txname; /* names of TYPEs */
> > +
> > +                       txname = btf__name_by_offset(obj->btf_vmlinux, tx->name_off);
> > +                       tname = btf__name_by_offset(obj->btf, t->name_off);
> > +
> > +                       pr_warn("ksym '%s' expects type '%s' (vmlinux_btf_id: #%d), "
> > +                               "but got '%s' (btf_id: #%d)\n", ext->name,
> > +                               txname, vtx, tname, vt);
> > +                       return -EINVAL;
> > +               }
>
> yeah, definitely just use bpf_core_types_are_compat() here. You'll
> want to skip_mods_and_typedefs first, but everything else should work
> for your use case.
>

Ack. bpf_core_types_are_compat() is indeed a perfect fit here.

[...]
Andrii Nakryiko Sept. 1, 2020, 6:11 p.m. UTC | #3
On Thu, Aug 27, 2020 at 3:29 PM Hao Luo <haoluo@google.com> wrote:
>
> On Fri, Aug 21, 2020 at 3:37 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > >
> > > If a ksym is defined with a type, libbpf will try to find the ksym's btf
> > > information from kernel btf. If a valid btf entry for the ksym is found,
> > > libbpf can pass in the found btf id to the verifier, which validates the
> > > ksym's type and value.
> > >
> > > Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> > > but it has the symbol's address (read from kallsyms) and its value is
> > > treated as a raw pointer.
> > >
> > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > ---
> > >  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 114 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > index 4a81c6b2d21b..94eff612c7c2 100644
> > > --- a/tools/lib/bpf/libbpf.c
> > > +++ b/tools/lib/bpf/libbpf.c
> > > @@ -357,7 +357,16 @@ struct extern_desc {
> > >                         bool is_signed;
> > >                 } kcfg;
> > >                 struct {
> > > -                       unsigned long long addr;
> > > +                       /*
> > > +                        *  1. If ksym is typeless, the field 'addr' is valid.
> > > +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> > > +                        *     valid.
> > > +                        */
> > > +                       bool is_typeless;
> > > +                       union {
> > > +                               unsigned long long addr;
> > > +                               int vmlinux_btf_id;
> > > +                       };
> >
> > ksym is 16 bytes anyways, union doesn't help to save space. I propose
> > to encode all this with just two fields: vmlinux_btf_id and addr. If
> > btf_id == 0, then extern is typeless.
>
> Ack on expanding the union. But I slightly preferred keeping
> is_typeless. IIUC, btf_id points a VAR_KIND, we need the following
> pointer chasing every time
>
> t = btf__type_by_id(obj->btf, ext->btf_id);
> t->type;
>
> which I felt is worse than keeping a is_typeless flag.

Sorry, I'm not following. In all places where you would check
sym->is_typeless, you'd now just do:

if (ext->ksym.vmlinux_btf_id) {
  /* typed, use ext->ksym.vmlinux_btf_id */
} else {
  /* typeless */
}

>
> >
> > >                 } ksym;
> > >         };
> > >  };
> > > @@ -382,6 +391,7 @@ struct bpf_object {
> > >
> > >         bool loaded;
> > >         bool has_pseudo_calls;
> > > +       bool has_typed_ksyms;
> > >
> > >         /*
> > >          * Information when doing elf related work. Only valid if fd
> > > @@ -2521,6 +2531,10 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
> > >         if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
> > >                 need_vmlinux_btf = true;
> > >
> > > +       /* Support for typed ksyms needs kernel BTF */
> > > +       if (obj->has_typed_ksyms)
> > > +               need_vmlinux_btf = true;
> >
> > On the second read, I don't think you really need `has_typed_ksyms` at
> > all. Just iterate over ksym externs and see if you have a typed one.
> > It's the only place that cares.
> >
>
> Ack. Will iterate over ksym externs here.
>
> > > +
> > >         bpf_object__for_each_program(prog, obj) {
> > >                 if (!prog->load)
> > >                         continue;
> > > @@ -2975,10 +2989,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > >                         ext->type = EXT_KSYM;
> > >
> > >                         vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
> > > -                       if (!btf_is_void(vt)) {
> > > -                               pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
> > > -                               return -ENOTSUP;
> > > -                       }
> > > +                       ext->ksym.is_typeless = btf_is_void(vt);
> > > +
> > > +                       if (!obj->has_typed_ksyms && !ext->ksym.is_typeless)
> > > +                               obj->has_typed_ksyms = true;
> >
> > nit: keep it simple:
> >
> > if (ext->ksym.is_typeless)
> >   obj->has_typed_ksyms = true;
> >
>
> Ack.
>
> > >                 } else {
> > >                         pr_warn("unrecognized extern section '%s'\n", sec_name);
> > >                         return -ENOTSUP;
> > > @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > >         /* sort externs by type, for kcfg ones also by (align, size, name) */
> > >         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
> > >
> > > -       /* for .ksyms section, we need to turn all externs into allocated
> > > -        * variables in BTF to pass kernel verification; we do this by
> > > -        * pretending that each extern is a 8-byte variable
> > > +       /* for .ksyms section, we need to turn all typeless externs into
> > > +        * allocated variables in BTF to pass kernel verification; we do
> > > +        * this by pretending that each typeless extern is a 8-byte variable
> > >          */
> > >         if (ksym_sec) {
> > >                 /* find existing 4-byte integer type in BTF to use for fake
> > > @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > >
> > >                 sec = ksym_sec;
> > >                 n = btf_vlen(sec);
> > > -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> > > +               for (i = 0, off = 0; i < n; i++) {
> > >                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
> > >                         struct btf_type *vt;
> > >
> > > @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > >                                 return -ESRCH;
> > >                         }
> > >                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> > > -                       vt->type = int_btf_id;
> > > +                       if (ext->ksym.is_typeless) {
> > > +                               vt->type = int_btf_id;
> > > +                               vs->size = sizeof(int);
> > > +                       }
> > >                         vs->offset = off;
> > > -                       vs->size = sizeof(int);
> > > +                       off += vs->size;
> > > +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> > > +                                ext->name, vt->type, vs->size, vs->offset);
> >
> > It's a bit of a waste that we still allocate memory for those typed
> > ksym externs, as they don't really need space. But modifying BTF is a
> > pain right now, so I think we'll have to do it, until we have a better
> > BTF API. But let's make them integers for now to take a fixed and
> > small amount of space.
> >
>
> Do you mean making typed ksym externs of type integer? If so, we can't
> do that, I think. After collect_externs, we later need to compare the
> declared extern's type against the type defined in kernel. Better not
> rewrite their types in BTf.

Then maybe we need to make btf_id to point to the actual type of the
variable, not BTF_KIND_VAR? Or just additionally record type's btf_id,
not sure which one makes more sense at the moment.

>
> I am generally against modifying BTF. I initially didn't notice that
> all the ksym externs' types are modified to 'int' and the type
> comparison I mentioned above always failed. I dumped the btf in
> vmlinux and the btf in object file, checked the kernel variable's
> source code, printed out everything I could. The experience was very
> bad.
>

It might be confusing, I agree, but the alternative is just a waste of
memory just to match the BTF definition of a DATASEC, which describes
externs. It seems sloppy to allocate a bunch of unused memory just to
match the kernel's variable size, while in reality we either use 8
bytes used (for typeless externs, storing ksym address) or none (for
typed externs).

Another alternative is to not specify BTF ID for .ksyms map, but it's
not great for typeless externs case, as we are losing all type info
completely. Trade-offs...

[...]

> > > +               }
> > > +
> > > +               id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
> > > +                                           BTF_KIND_VAR);
> > > +               if (id <= 0) {
> > > +                       pr_warn("no btf entry for ksym '%s' in vmlinux.\n",
> > > +                               ext->name);
> > > +                       return -ESRCH;
> > > +               }
> > > +
> > > +               vx = btf__type_by_id(obj->btf_vmlinux, id);
> > > +               tx = skip_mods_and_typedefs(obj->btf_vmlinux, vx->type, &vtx);
> > > +
> > > +               v = btf__type_by_id(obj->btf, ext->btf_id);
> > > +               t = skip_mods_and_typedefs(obj->btf, v->type, &vt);
> > > +
> > > +               if (!btf_ksym_type_match(obj->btf_vmlinux, vtx, obj->btf, vt)) {
> > > +                       const char *tname, *txname; /* names of TYPEs */
> > > +
> > > +                       txname = btf__name_by_offset(obj->btf_vmlinux, tx->name_off);
> > > +                       tname = btf__name_by_offset(obj->btf, t->name_off);
> > > +
> > > +                       pr_warn("ksym '%s' expects type '%s' (vmlinux_btf_id: #%d), "
> > > +                               "but got '%s' (btf_id: #%d)\n", ext->name,
> > > +                               txname, vtx, tname, vt);
> > > +                       return -EINVAL;
> > > +               }
> >
> > yeah, definitely just use bpf_core_types_are_compat() here. You'll
> > want to skip_mods_and_typedefs first, but everything else should work
> > for your use case.
> >
>
> Ack. bpf_core_types_are_compat() is indeed a perfect fit here.
>

ok, great

> [...]
Hao Luo Sept. 1, 2020, 8:35 p.m. UTC | #4
On Tue, Sep 1, 2020 at 11:11 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Aug 27, 2020 at 3:29 PM Hao Luo <haoluo@google.com> wrote:
> >
> > On Fri, Aug 21, 2020 at 3:37 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > > >
> > > > If a ksym is defined with a type, libbpf will try to find the ksym's btf
> > > > information from kernel btf. If a valid btf entry for the ksym is found,
> > > > libbpf can pass in the found btf id to the verifier, which validates the
> > > > ksym's type and value.
> > > >
> > > > Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> > > > but it has the symbol's address (read from kallsyms) and its value is
> > > > treated as a raw pointer.
> > > >
> > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > ---
> > > >  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
> > > >  1 file changed, 114 insertions(+), 16 deletions(-)
> > > >
> > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > > index 4a81c6b2d21b..94eff612c7c2 100644
> > > > --- a/tools/lib/bpf/libbpf.c
> > > > +++ b/tools/lib/bpf/libbpf.c
> > > > @@ -357,7 +357,16 @@ struct extern_desc {
> > > >                         bool is_signed;
> > > >                 } kcfg;
> > > >                 struct {
> > > > -                       unsigned long long addr;
> > > > +                       /*
> > > > +                        *  1. If ksym is typeless, the field 'addr' is valid.
> > > > +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> > > > +                        *     valid.
> > > > +                        */
> > > > +                       bool is_typeless;
> > > > +                       union {
> > > > +                               unsigned long long addr;
> > > > +                               int vmlinux_btf_id;
> > > > +                       };
> > >
> > > ksym is 16 bytes anyways, union doesn't help to save space. I propose
> > > to encode all this with just two fields: vmlinux_btf_id and addr. If
> > > btf_id == 0, then extern is typeless.
> >
> > Ack on expanding the union. But I slightly preferred keeping
> > is_typeless. IIUC, btf_id points a VAR_KIND, we need the following
> > pointer chasing every time
> >
> > t = btf__type_by_id(obj->btf, ext->btf_id);
> > t->type;
> >
> > which I felt is worse than keeping a is_typeless flag.
>
> Sorry, I'm not following. In all places where you would check
> sym->is_typeless, you'd now just do:
>
> if (ext->ksym.vmlinux_btf_id) {
>   /* typed, use ext->ksym.vmlinux_btf_id */
> } else {
>   /* typeless */
> }
>

My apologies, I should be more specific.

'vmlinux_btf_id' gets its value in bpf_object__resolve_ksyms_btf_id().
Before we call this function, there are three places that need to tell
whether a ksym is typed, currently in v1. Specifically,

 - in bpf_object__collect_externs(), typeless ksyms are rewritten as
'int', in contrast, typed ones are left untouched (though this may
change in v2).
 - bpf_object__load_vmlinux_btf() now is called before
bpf_object__resolve_ksyms_btf_id(). In v1's design, if there is no
typed ksym, we could skip loading vmlinux_btf potentially.
 - even bpf_object__resolve_ksyms_btf_id() itself is conditionally
called, depending on whether there is any typed ksym.

At the time when these places are called, vmlinux_btf_id is
unavailable and we can't use it for the purpose of telling whether a
ksym is typed.

However, rather than vmlinux_btf_id, there may be an alternative. We
can record the ksym extern's type's btf_id and use that as
'is_typeless' flag. This also solves the problem below.

[...]

> > > >                 } else {
> > > >                         pr_warn("unrecognized extern section '%s'\n", sec_name);
> > > >                         return -ENOTSUP;
> > > > @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > >         /* sort externs by type, for kcfg ones also by (align, size, name) */
> > > >         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
> > > >
> > > > -       /* for .ksyms section, we need to turn all externs into allocated
> > > > -        * variables in BTF to pass kernel verification; we do this by
> > > > -        * pretending that each extern is a 8-byte variable
> > > > +       /* for .ksyms section, we need to turn all typeless externs into
> > > > +        * allocated variables in BTF to pass kernel verification; we do
> > > > +        * this by pretending that each typeless extern is a 8-byte variable
> > > >          */
> > > >         if (ksym_sec) {
> > > >                 /* find existing 4-byte integer type in BTF to use for fake
> > > > @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > >
> > > >                 sec = ksym_sec;
> > > >                 n = btf_vlen(sec);
> > > > -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> > > > +               for (i = 0, off = 0; i < n; i++) {
> > > >                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
> > > >                         struct btf_type *vt;
> > > >
> > > > @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > >                                 return -ESRCH;
> > > >                         }
> > > >                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> > > > -                       vt->type = int_btf_id;
> > > > +                       if (ext->ksym.is_typeless) {
> > > > +                               vt->type = int_btf_id;
> > > > +                               vs->size = sizeof(int);
> > > > +                       }
> > > >                         vs->offset = off;
> > > > -                       vs->size = sizeof(int);
> > > > +                       off += vs->size;
> > > > +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> > > > +                                ext->name, vt->type, vs->size, vs->offset);
> > >
> > > It's a bit of a waste that we still allocate memory for those typed
> > > ksym externs, as they don't really need space. But modifying BTF is a
> > > pain right now, so I think we'll have to do it, until we have a better
> > > BTF API. But let's make them integers for now to take a fixed and
> > > small amount of space.
> > >
> >
> > Do you mean making typed ksym externs of type integer? If so, we can't
> > do that, I think. After collect_externs, we later need to compare the
> > declared extern's type against the type defined in kernel. Better not
> > rewrite their types in BTf.
>
> Then maybe we need to make btf_id to point to the actual type of the
> variable, not BTF_KIND_VAR? Or just additionally record type's btf_id,
> not sure which one makes more sense at the moment.
>
> >
> > I am generally against modifying BTF. I initially didn't notice that
> > all the ksym externs' types are modified to 'int' and the type
> > comparison I mentioned above always failed. I dumped the btf in
> > vmlinux and the btf in object file, checked the kernel variable's
> > source code, printed out everything I could. The experience was very
> > bad.
> >
>
> It might be confusing, I agree, but the alternative is just a waste of
> memory just to match the BTF definition of a DATASEC, which describes
> externs. It seems sloppy to allocate a bunch of unused memory just to
> match the kernel's variable size, while in reality we either use 8
> bytes used (for typeless externs, storing ksym address) or none (for
> typed externs).
>
> Another alternative is to not specify BTF ID for .ksyms map, but it's
> not great for typeless externs case, as we are losing all type info
> completely. Trade-offs...
>

I see. It looks like rewriting all ksym externs' type to integers is
the most straightforward solution here, though I felt a bit hacky.

I can record the btf_id of the var's type before rewriting, so
bpf_core_type_are_compat() can find the true type for comparison. One
good thing about recording the type's btf_id is that it can be used to
tell whether the ksym extern is typed or not, before vmlinux_btf_id
gets its value. I will think about this and try the alternatives a bit
more and follow up if I come up with a better solution.

Thanks!

[...]
Andrii Nakryiko Sept. 1, 2020, 11:54 p.m. UTC | #5
On Tue, Sep 1, 2020 at 1:35 PM Hao Luo <haoluo@google.com> wrote:
>
> On Tue, Sep 1, 2020 at 11:11 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Aug 27, 2020 at 3:29 PM Hao Luo <haoluo@google.com> wrote:
> > >
> > > On Fri, Aug 21, 2020 at 3:37 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > > > >
> > > > > If a ksym is defined with a type, libbpf will try to find the ksym's btf
> > > > > information from kernel btf. If a valid btf entry for the ksym is found,
> > > > > libbpf can pass in the found btf id to the verifier, which validates the
> > > > > ksym's type and value.
> > > > >
> > > > > Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> > > > > but it has the symbol's address (read from kallsyms) and its value is
> > > > > treated as a raw pointer.
> > > > >
> > > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > > ---
> > > > >  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
> > > > >  1 file changed, 114 insertions(+), 16 deletions(-)
> > > > >
> > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > > > index 4a81c6b2d21b..94eff612c7c2 100644
> > > > > --- a/tools/lib/bpf/libbpf.c
> > > > > +++ b/tools/lib/bpf/libbpf.c
> > > > > @@ -357,7 +357,16 @@ struct extern_desc {
> > > > >                         bool is_signed;
> > > > >                 } kcfg;
> > > > >                 struct {
> > > > > -                       unsigned long long addr;
> > > > > +                       /*
> > > > > +                        *  1. If ksym is typeless, the field 'addr' is valid.
> > > > > +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> > > > > +                        *     valid.
> > > > > +                        */
> > > > > +                       bool is_typeless;
> > > > > +                       union {
> > > > > +                               unsigned long long addr;
> > > > > +                               int vmlinux_btf_id;
> > > > > +                       };
> > > >
> > > > ksym is 16 bytes anyways, union doesn't help to save space. I propose
> > > > to encode all this with just two fields: vmlinux_btf_id and addr. If
> > > > btf_id == 0, then extern is typeless.
> > >
> > > Ack on expanding the union. But I slightly preferred keeping
> > > is_typeless. IIUC, btf_id points a VAR_KIND, we need the following
> > > pointer chasing every time
> > >
> > > t = btf__type_by_id(obj->btf, ext->btf_id);
> > > t->type;
> > >
> > > which I felt is worse than keeping a is_typeless flag.
> >
> > Sorry, I'm not following. In all places where you would check
> > sym->is_typeless, you'd now just do:
> >
> > if (ext->ksym.vmlinux_btf_id) {
> >   /* typed, use ext->ksym.vmlinux_btf_id */
> > } else {
> >   /* typeless */
> > }
> >
>
> My apologies, I should be more specific.
>
> 'vmlinux_btf_id' gets its value in bpf_object__resolve_ksyms_btf_id().
> Before we call this function, there are three places that need to tell
> whether a ksym is typed, currently in v1. Specifically,
>
>  - in bpf_object__collect_externs(), typeless ksyms are rewritten as
> 'int', in contrast, typed ones are left untouched (though this may
> change in v2).
>  - bpf_object__load_vmlinux_btf() now is called before
> bpf_object__resolve_ksyms_btf_id(). In v1's design, if there is no
> typed ksym, we could skip loading vmlinux_btf potentially.
>  - even bpf_object__resolve_ksyms_btf_id() itself is conditionally
> called, depending on whether there is any typed ksym.
>
> At the time when these places are called, vmlinux_btf_id is
> unavailable and we can't use it for the purpose of telling whether a
> ksym is typed.
>
> However, rather than vmlinux_btf_id, there may be an alternative. We
> can record the ksym extern's type's btf_id and use that as
> 'is_typeless' flag. This also solves the problem below.

Oh, I was thinking that vmlinux_btf_id contains a local BTF ID this
whole time (clearly ignoring the "vmlinux_" part).

>
> [...]
>
> > > > >                 } else {
> > > > >                         pr_warn("unrecognized extern section '%s'\n", sec_name);
> > > > >                         return -ENOTSUP;
> > > > > @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > >         /* sort externs by type, for kcfg ones also by (align, size, name) */
> > > > >         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
> > > > >
> > > > > -       /* for .ksyms section, we need to turn all externs into allocated
> > > > > -        * variables in BTF to pass kernel verification; we do this by
> > > > > -        * pretending that each extern is a 8-byte variable
> > > > > +       /* for .ksyms section, we need to turn all typeless externs into
> > > > > +        * allocated variables in BTF to pass kernel verification; we do
> > > > > +        * this by pretending that each typeless extern is a 8-byte variable
> > > > >          */
> > > > >         if (ksym_sec) {
> > > > >                 /* find existing 4-byte integer type in BTF to use for fake
> > > > > @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > >
> > > > >                 sec = ksym_sec;
> > > > >                 n = btf_vlen(sec);
> > > > > -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> > > > > +               for (i = 0, off = 0; i < n; i++) {
> > > > >                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
> > > > >                         struct btf_type *vt;
> > > > >
> > > > > @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > >                                 return -ESRCH;
> > > > >                         }
> > > > >                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> > > > > -                       vt->type = int_btf_id;
> > > > > +                       if (ext->ksym.is_typeless) {
> > > > > +                               vt->type = int_btf_id;
> > > > > +                               vs->size = sizeof(int);
> > > > > +                       }
> > > > >                         vs->offset = off;
> > > > > -                       vs->size = sizeof(int);
> > > > > +                       off += vs->size;
> > > > > +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> > > > > +                                ext->name, vt->type, vs->size, vs->offset);
> > > >
> > > > It's a bit of a waste that we still allocate memory for those typed
> > > > ksym externs, as they don't really need space. But modifying BTF is a
> > > > pain right now, so I think we'll have to do it, until we have a better
> > > > BTF API. But let's make them integers for now to take a fixed and
> > > > small amount of space.
> > > >
> > >
> > > Do you mean making typed ksym externs of type integer? If so, we can't
> > > do that, I think. After collect_externs, we later need to compare the
> > > declared extern's type against the type defined in kernel. Better not
> > > rewrite their types in BTf.
> >
> > Then maybe we need to make btf_id to point to the actual type of the
> > variable, not BTF_KIND_VAR? Or just additionally record type's btf_id,
> > not sure which one makes more sense at the moment.
> >
> > >
> > > I am generally against modifying BTF. I initially didn't notice that
> > > all the ksym externs' types are modified to 'int' and the type
> > > comparison I mentioned above always failed. I dumped the btf in
> > > vmlinux and the btf in object file, checked the kernel variable's
> > > source code, printed out everything I could. The experience was very
> > > bad.
> > >
> >
> > It might be confusing, I agree, but the alternative is just a waste of
> > memory just to match the BTF definition of a DATASEC, which describes
> > externs. It seems sloppy to allocate a bunch of unused memory just to
> > match the kernel's variable size, while in reality we either use 8
> > bytes used (for typeless externs, storing ksym address) or none (for
> > typed externs).
> >
> > Another alternative is to not specify BTF ID for .ksyms map, but it's
> > not great for typeless externs case, as we are losing all type info
> > completely. Trade-offs...
> >
>
> I see. It looks like rewriting all ksym externs' type to integers is
> the most straightforward solution here, though I felt a bit hacky.
>
> I can record the btf_id of the var's type before rewriting, so
> bpf_core_type_are_compat() can find the true type for comparison. One
> good thing about recording the type's btf_id is that it can be used to
> tell whether the ksym extern is typed or not, before vmlinux_btf_id

that's what I've been getting at, but I missed that vmlinux_btf_id is
kernel BTF type ID. So let's record both local and target BTF type IDs
and use local_btf_id as an indicator of typed vs typeless?

> gets its value. I will think about this and try the alternatives a bit
> more and follow up if I come up with a better solution.
>
> Thanks!
>
> [...]
Hao Luo Sept. 2, 2020, 12:46 a.m. UTC | #6
On Tue, Sep 1, 2020 at 4:55 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Sep 1, 2020 at 1:35 PM Hao Luo <haoluo@google.com> wrote:
> >
> > On Tue, Sep 1, 2020 at 11:11 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Aug 27, 2020 at 3:29 PM Hao Luo <haoluo@google.com> wrote:
> > > >
> > > > On Fri, Aug 21, 2020 at 3:37 PM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > > > > >
> > > > > > If a ksym is defined with a type, libbpf will try to find the ksym's btf
> > > > > > information from kernel btf. If a valid btf entry for the ksym is found,
> > > > > > libbpf can pass in the found btf id to the verifier, which validates the
> > > > > > ksym's type and value.
> > > > > >
> > > > > > Typeless ksyms (i.e. those defined as 'void') will not have such btf_id,
> > > > > > but it has the symbol's address (read from kallsyms) and its value is
> > > > > > treated as a raw pointer.
> > > > > >
> > > > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > > > ---
> > > > > >  tools/lib/bpf/libbpf.c | 130 ++++++++++++++++++++++++++++++++++++-----
> > > > > >  1 file changed, 114 insertions(+), 16 deletions(-)
> > > > > >
> > > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > > > > index 4a81c6b2d21b..94eff612c7c2 100644
> > > > > > --- a/tools/lib/bpf/libbpf.c
> > > > > > +++ b/tools/lib/bpf/libbpf.c
> > > > > > @@ -357,7 +357,16 @@ struct extern_desc {
> > > > > >                         bool is_signed;
> > > > > >                 } kcfg;
> > > > > >                 struct {
> > > > > > -                       unsigned long long addr;
> > > > > > +                       /*
> > > > > > +                        *  1. If ksym is typeless, the field 'addr' is valid.
> > > > > > +                        *  2. If ksym is typed, the field 'vmlinux_btf_id' is
> > > > > > +                        *     valid.
> > > > > > +                        */
> > > > > > +                       bool is_typeless;
> > > > > > +                       union {
> > > > > > +                               unsigned long long addr;
> > > > > > +                               int vmlinux_btf_id;
> > > > > > +                       };
> > > > >
> > > > > ksym is 16 bytes anyways, union doesn't help to save space. I propose
> > > > > to encode all this with just two fields: vmlinux_btf_id and addr. If
> > > > > btf_id == 0, then extern is typeless.
> > > >
> > > > Ack on expanding the union. But I slightly preferred keeping
> > > > is_typeless. IIUC, btf_id points a VAR_KIND, we need the following
> > > > pointer chasing every time
> > > >
> > > > t = btf__type_by_id(obj->btf, ext->btf_id);
> > > > t->type;
> > > >
> > > > which I felt is worse than keeping a is_typeless flag.
> > >
> > > Sorry, I'm not following. In all places where you would check
> > > sym->is_typeless, you'd now just do:
> > >
> > > if (ext->ksym.vmlinux_btf_id) {
> > >   /* typed, use ext->ksym.vmlinux_btf_id */
> > > } else {
> > >   /* typeless */
> > > }
> > >
> >
> > My apologies, I should be more specific.
> >
> > 'vmlinux_btf_id' gets its value in bpf_object__resolve_ksyms_btf_id().
> > Before we call this function, there are three places that need to tell
> > whether a ksym is typed, currently in v1. Specifically,
> >
> >  - in bpf_object__collect_externs(), typeless ksyms are rewritten as
> > 'int', in contrast, typed ones are left untouched (though this may
> > change in v2).
> >  - bpf_object__load_vmlinux_btf() now is called before
> > bpf_object__resolve_ksyms_btf_id(). In v1's design, if there is no
> > typed ksym, we could skip loading vmlinux_btf potentially.
> >  - even bpf_object__resolve_ksyms_btf_id() itself is conditionally
> > called, depending on whether there is any typed ksym.
> >
> > At the time when these places are called, vmlinux_btf_id is
> > unavailable and we can't use it for the purpose of telling whether a
> > ksym is typed.
> >
> > However, rather than vmlinux_btf_id, there may be an alternative. We
> > can record the ksym extern's type's btf_id and use that as
> > 'is_typeless' flag. This also solves the problem below.
>
> Oh, I was thinking that vmlinux_btf_id contains a local BTF ID this
> whole time (clearly ignoring the "vmlinux_" part).
>
> >
> > [...]
> >
> > > > > >                 } else {
> > > > > >                         pr_warn("unrecognized extern section '%s'\n", sec_name);
> > > > > >                         return -ENOTSUP;
> > > > > > @@ -2992,9 +3006,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > > >         /* sort externs by type, for kcfg ones also by (align, size, name) */
> > > > > >         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
> > > > > >
> > > > > > -       /* for .ksyms section, we need to turn all externs into allocated
> > > > > > -        * variables in BTF to pass kernel verification; we do this by
> > > > > > -        * pretending that each extern is a 8-byte variable
> > > > > > +       /* for .ksyms section, we need to turn all typeless externs into
> > > > > > +        * allocated variables in BTF to pass kernel verification; we do
> > > > > > +        * this by pretending that each typeless extern is a 8-byte variable
> > > > > >          */
> > > > > >         if (ksym_sec) {
> > > > > >                 /* find existing 4-byte integer type in BTF to use for fake
> > > > > > @@ -3012,7 +3026,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > > >
> > > > > >                 sec = ksym_sec;
> > > > > >                 n = btf_vlen(sec);
> > > > > > -               for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
> > > > > > +               for (i = 0, off = 0; i < n; i++) {
> > > > > >                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
> > > > > >                         struct btf_type *vt;
> > > > > >
> > > > > > @@ -3025,9 +3039,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
> > > > > >                                 return -ESRCH;
> > > > > >                         }
> > > > > >                         btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
> > > > > > -                       vt->type = int_btf_id;
> > > > > > +                       if (ext->ksym.is_typeless) {
> > > > > > +                               vt->type = int_btf_id;
> > > > > > +                               vs->size = sizeof(int);
> > > > > > +                       }
> > > > > >                         vs->offset = off;
> > > > > > -                       vs->size = sizeof(int);
> > > > > > +                       off += vs->size;
> > > > > > +                       pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
> > > > > > +                                ext->name, vt->type, vs->size, vs->offset);
> > > > >
> > > > > It's a bit of a waste that we still allocate memory for those typed
> > > > > ksym externs, as they don't really need space. But modifying BTF is a
> > > > > pain right now, so I think we'll have to do it, until we have a better
> > > > > BTF API. But let's make them integers for now to take a fixed and
> > > > > small amount of space.
> > > > >
> > > >
> > > > Do you mean making typed ksym externs of type integer? If so, we can't
> > > > do that, I think. After collect_externs, we later need to compare the
> > > > declared extern's type against the type defined in kernel. Better not
> > > > rewrite their types in BTf.
> > >
> > > Then maybe we need to make btf_id to point to the actual type of the
> > > variable, not BTF_KIND_VAR? Or just additionally record type's btf_id,
> > > not sure which one makes more sense at the moment.
> > >
> > > >
> > > > I am generally against modifying BTF. I initially didn't notice that
> > > > all the ksym externs' types are modified to 'int' and the type
> > > > comparison I mentioned above always failed. I dumped the btf in
> > > > vmlinux and the btf in object file, checked the kernel variable's
> > > > source code, printed out everything I could. The experience was very
> > > > bad.
> > > >
> > >
> > > It might be confusing, I agree, but the alternative is just a waste of
> > > memory just to match the BTF definition of a DATASEC, which describes
> > > externs. It seems sloppy to allocate a bunch of unused memory just to
> > > match the kernel's variable size, while in reality we either use 8
> > > bytes used (for typeless externs, storing ksym address) or none (for
> > > typed externs).
> > >
> > > Another alternative is to not specify BTF ID for .ksyms map, but it's
> > > not great for typeless externs case, as we are losing all type info
> > > completely. Trade-offs...
> > >
> >
> > I see. It looks like rewriting all ksym externs' type to integers is
> > the most straightforward solution here, though I felt a bit hacky.
> >
> > I can record the btf_id of the var's type before rewriting, so
> > bpf_core_type_are_compat() can find the true type for comparison. One
> > good thing about recording the type's btf_id is that it can be used to
> > tell whether the ksym extern is typed or not, before vmlinux_btf_id
>
> that's what I've been getting at, but I missed that vmlinux_btf_id is
> kernel BTF type ID. So let's record both local and target BTF type IDs
> and use local_btf_id as an indicator of typed vs typeless?
>

Yup, that sounds great!

[...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4a81c6b2d21b..94eff612c7c2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -357,7 +357,16 @@  struct extern_desc {
 			bool is_signed;
 		} kcfg;
 		struct {
-			unsigned long long addr;
+			/*
+			 *  1. If ksym is typeless, the field 'addr' is valid.
+			 *  2. If ksym is typed, the field 'vmlinux_btf_id' is
+			 *     valid.
+			 */
+			bool is_typeless;
+			union {
+				unsigned long long addr;
+				int vmlinux_btf_id;
+			};
 		} ksym;
 	};
 };
@@ -382,6 +391,7 @@  struct bpf_object {
 
 	bool loaded;
 	bool has_pseudo_calls;
+	bool has_typed_ksyms;
 
 	/*
 	 * Information when doing elf related work. Only valid if fd
@@ -2521,6 +2531,10 @@  static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
 		need_vmlinux_btf = true;
 
+	/* Support for typed ksyms needs kernel BTF */
+	if (obj->has_typed_ksyms)
+		need_vmlinux_btf = true;
+
 	bpf_object__for_each_program(prog, obj) {
 		if (!prog->load)
 			continue;
@@ -2975,10 +2989,10 @@  static int bpf_object__collect_externs(struct bpf_object *obj)
 			ext->type = EXT_KSYM;
 
 			vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
-			if (!btf_is_void(vt)) {
-				pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
-				return -ENOTSUP;
-			}
+			ext->ksym.is_typeless = btf_is_void(vt);
+
+			if (!obj->has_typed_ksyms && !ext->ksym.is_typeless)
+				obj->has_typed_ksyms = true;
 		} else {
 			pr_warn("unrecognized extern section '%s'\n", sec_name);
 			return -ENOTSUP;
@@ -2992,9 +3006,9 @@  static int bpf_object__collect_externs(struct bpf_object *obj)
 	/* sort externs by type, for kcfg ones also by (align, size, name) */
 	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 
-	/* for .ksyms section, we need to turn all externs into allocated
-	 * variables in BTF to pass kernel verification; we do this by
-	 * pretending that each extern is a 8-byte variable
+	/* for .ksyms section, we need to turn all typeless externs into
+	 * allocated variables in BTF to pass kernel verification; we do
+	 * this by pretending that each typeless extern is a 8-byte variable
 	 */
 	if (ksym_sec) {
 		/* find existing 4-byte integer type in BTF to use for fake
@@ -3012,7 +3026,7 @@  static int bpf_object__collect_externs(struct bpf_object *obj)
 
 		sec = ksym_sec;
 		n = btf_vlen(sec);
-		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+		for (i = 0, off = 0; i < n; i++) {
 			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 			struct btf_type *vt;
 
@@ -3025,9 +3039,14 @@  static int bpf_object__collect_externs(struct bpf_object *obj)
 				return -ESRCH;
 			}
 			btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
-			vt->type = int_btf_id;
+			if (ext->ksym.is_typeless) {
+				vt->type = int_btf_id;
+				vs->size = sizeof(int);
+			}
 			vs->offset = off;
-			vs->size = sizeof(int);
+			off += vs->size;
+			pr_debug("ksym var_secinfo: var '%s', type #%d, size %d, offset %d\n",
+				 ext->name, vt->type, vs->size, vs->offset);
 		}
 		sec->size = off;
 	}
@@ -5300,8 +5319,13 @@  bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
 				insn[1].imm = ext->kcfg.data_off;
 			} else /* EXT_KSYM */ {
-				insn[0].imm = (__u32)ext->ksym.addr;
-				insn[1].imm = ext->ksym.addr >> 32;
+				if (ext->ksym.is_typeless) { /* typelss ksyms */
+					insn[0].imm = (__u32)ext->ksym.addr;
+					insn[1].imm = ext->ksym.addr >> 32;
+				} else { /* typed ksyms */
+					insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+					insn[0].imm = ext->ksym.vmlinux_btf_id;
+				}
 			}
 			break;
 		case RELO_CALL:
@@ -6019,6 +6043,10 @@  static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 		if (!ext || ext->type != EXT_KSYM)
 			continue;
 
+		/* Typed ksyms have the verifier to fill their addresses. */
+		if (!ext->ksym.is_typeless)
+			continue;
+
 		if (ext->is_set && ext->ksym.addr != sym_addr) {
 			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
 				sym_name, ext->ksym.addr, sym_addr);
@@ -6037,10 +6065,72 @@  static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 	return err;
 }
 
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+	struct extern_desc *ext;
+	int i, id;
+
+	if (!obj->btf_vmlinux) {
+		pr_warn("support of typed ksyms needs kernel btf.\n");
+		return -ENOENT;
+	}
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		const struct btf_type *v, *vx; /* VARs in object and vmlinux btf */
+		const struct btf_type *t, *tx; /* TYPEs in btf */
+		__u32 vt, vtx; /* btf_ids of TYPEs */
+
+		ext = &obj->externs[i];
+		if (ext->type != EXT_KSYM)
+			continue;
+
+		if (ext->ksym.is_typeless)
+			continue;
+
+		if (ext->is_set) {
+			pr_warn("typed ksym '%s' resolved as typeless ksyms.\n",
+				ext->name);
+			return -EFAULT;
+		}
+
+		id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
+					    BTF_KIND_VAR);
+		if (id <= 0) {
+			pr_warn("no btf entry for ksym '%s' in vmlinux.\n",
+				ext->name);
+			return -ESRCH;
+		}
+
+		vx = btf__type_by_id(obj->btf_vmlinux, id);
+		tx = skip_mods_and_typedefs(obj->btf_vmlinux, vx->type, &vtx);
+
+		v = btf__type_by_id(obj->btf, ext->btf_id);
+		t = skip_mods_and_typedefs(obj->btf, v->type, &vt);
+
+		if (!btf_ksym_type_match(obj->btf_vmlinux, vtx, obj->btf, vt)) {
+			const char *tname, *txname; /* names of TYPEs */
+
+			txname = btf__name_by_offset(obj->btf_vmlinux, tx->name_off);
+			tname = btf__name_by_offset(obj->btf, t->name_off);
+
+			pr_warn("ksym '%s' expects type '%s' (vmlinux_btf_id: #%d), "
+				"but got '%s' (btf_id: #%d)\n", ext->name,
+				txname, vtx, tname, vt);
+			return -EINVAL;
+		}
+
+		ext->is_set = true;
+		ext->ksym.vmlinux_btf_id = id;
+		pr_debug("extern (ksym) %s=vmlinux_btf_id(#%d)\n", ext->name, id);
+	}
+	return 0;
+}
+
 static int bpf_object__resolve_externs(struct bpf_object *obj,
 				       const char *extra_kconfig)
 {
-	bool need_config = false, need_kallsyms = false;
+	bool need_config = false;
+	bool need_kallsyms = false, need_vmlinux_btf = false;
 	struct extern_desc *ext;
 	void *kcfg_data = NULL;
 	int err, i;
@@ -6071,7 +6161,10 @@  static int bpf_object__resolve_externs(struct bpf_object *obj,
 			   strncmp(ext->name, "CONFIG_", 7) == 0) {
 			need_config = true;
 		} else if (ext->type == EXT_KSYM) {
-			need_kallsyms = true;
+			if (ext->ksym.is_typeless)
+				need_kallsyms = true;
+			else
+				need_vmlinux_btf = true;
 		} else {
 			pr_warn("unrecognized extern '%s'\n", ext->name);
 			return -EINVAL;
@@ -6100,6 +6193,11 @@  static int bpf_object__resolve_externs(struct bpf_object *obj,
 		if (err)
 			return -EINVAL;
 	}
+	if (need_vmlinux_btf) {
+		err = bpf_object__resolve_ksyms_btf_id(obj);
+		if (err)
+			return -EINVAL;
+	}
 	for (i = 0; i < obj->nr_extern; i++) {
 		ext = &obj->externs[i];
 
@@ -6132,10 +6230,10 @@  int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	}
 
 	err = bpf_object__probe_loading(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
-	err = err ? : bpf_object__load_vmlinux_btf(obj);
 	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
 	err = err ? : bpf_object__create_maps(obj);
 	err = err ? : bpf_object__relocate(obj, attr->target_btf_path);