Message ID | 20200903223332.881541-5-haoluo@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | bpf: BTF support for ksyms | expand |
On Thu, Sep 3, 2020 at 3:35 PM Hao Luo <haoluo@google.com> wrote: > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel > except that it may return NULL. This happens when the cpu parameter is > out of range. So the caller must check the returned value. > > Acked-by: Andrii Nakryiko <andriin@fb.com> > Signed-off-by: Hao Luo <haoluo@google.com> > --- > include/linux/bpf.h | 3 ++ > include/linux/btf.h | 11 ++++++ > include/uapi/linux/bpf.h | 17 +++++++++ > kernel/bpf/btf.c | 10 ------ > kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++--- > kernel/trace/bpf_trace.c | 18 ++++++++++ > tools/include/uapi/linux/bpf.h | 17 +++++++++ > 7 files changed, 128 insertions(+), 14 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index c6d9f2c444f4..6b2034f7665e 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -292,6 +292,7 @@ enum bpf_arg_type { > ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ > ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ > ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ > + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ > }; > > /* type of values returned from helper functions */ > @@ -305,6 +306,7 @@ enum bpf_return_type { > RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ > RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ > RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ > + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ > }; > > /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs > @@ -385,6 +387,7 @@ enum bpf_reg_type { > PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ > PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ > PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ > + PTR_TO_PERCPU_BTF_ID, /* reg points to percpu kernel type */ > }; > > /* The information passed from prog-specific *_is_valid_access > diff --git a/include/linux/btf.h b/include/linux/btf.h > index 592373d359b9..07b7de1c05b0 100644 > --- a/include/linux/btf.h > +++ b/include/linux/btf.h > @@ -71,6 +71,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, > i < btf_type_vlen(struct_type); \ > i++, member++) > > +#define for_each_vsi(i, struct_type, member) \ datasec_type? > + for (i = 0, member = btf_type_var_secinfo(struct_type); \ > + i < btf_type_vlen(struct_type); \ > + i++, member++) > + > static inline bool btf_type_is_ptr(const struct btf_type *t) > { > return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; > @@ -155,6 +160,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) > return (const struct btf_member *)(t + 1); > } > > +static inline const struct btf_var_secinfo *btf_type_var_secinfo( > + const struct btf_type *t) > +{ > + return (const struct btf_var_secinfo *)(t + 1); > +} > + > #ifdef CONFIG_BPF_SYSCALL > const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); > const char *btf_name_by_offset(const struct btf *btf, u32 offset); > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index ab00ad9b32e5..d0ec94d5bdbf 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3596,6 +3596,22 @@ union bpf_attr { > * the data in *dst*. This is a wrapper of copy_from_user(). > * Return > * 0 on success, or a negative error in case of failure. > + * > + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) > + * Description > + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a > + * pointer to the percpu kernel variable on *cpu*. A ksym is an > + * extern variable decorated with '__ksym'. For ksym, there is a > + * global var (either static or global) defined of the same name > + * in the kernel. The ksym is percpu if the global var is percpu. > + * The returned pointer points to the global percpu var on *cpu*. > + * > + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the > + * kernel, except that bpf_per_cpu_ptr() may return NULL. This > + * happens if *cpu* is larger than nr_cpu_ids. The caller of > + * bpf_per_cpu_ptr() must check the returned value. > + * Return > + * A generic pointer pointing to the kernel percpu variable on *cpu*. Or NULL, if *cpu* is invalid. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3747,6 +3763,7 @@ union bpf_attr { > FN(inode_storage_delete), \ > FN(d_path), \ > FN(copy_from_user), \ > + FN(bpf_per_cpu_ptr), \ > /* */ > [...] > @@ -4003,6 +4008,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > if (type != expected_type) > goto err_type; > } > + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { > + expected_type = PTR_TO_PERCPU_BTF_ID; > + if (type != expected_type) > + goto err_type; > + if (!reg->btf_id) { > + verbose(env, "Helper has invalid btf_id in R%d\n", regno); > + return -EACCES; > + } > + meta->ret_btf_id = reg->btf_id; > } else if (arg_type == ARG_PTR_TO_BTF_ID) { > bool ids_match = false; > > @@ -5002,6 +5016,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > regs[BPF_REG_0].id = ++env->id_gen; > regs[BPF_REG_0].mem_size = meta.mem_size; > + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { Given this is internal implementation detail, this return type is fine, but I'm wondering if it would be better to just make PTR_TO_BTF_ID to allow not just structs? E.g., if we have an int, just allow reading those 4 bytes. Not sure what the implications are in terms of implementation, but conceptually that shouldn't be a problem, given we do have BTF type ID describing size and all. > + const struct btf_type *t; > + > + mark_reg_known_zero(env, regs, BPF_REG_0); > + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); > + if (!btf_type_is_struct(t)) { > + u32 tsize; > + const struct btf_type *ret; > + const char *tname; > + > + /* resolve the type size of ksym. */ > + ret = btf_resolve_size(btf_vmlinux, t, &tsize); > + if (IS_ERR(ret)) { > + tname = btf_name_by_offset(btf_vmlinux, t->name_off); > + verbose(env, "unable to resolve the size of type '%s': %ld\n", > + tname, PTR_ERR(ret)); > + return -EINVAL; > + } > + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > + regs[BPF_REG_0].mem_size = tsize; > + } else { > + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; > + regs[BPF_REG_0].btf_id = meta.ret_btf_id; > + } > } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { > int ret_btf_id; > [...] > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index b2a5380eb187..d474c1530f87 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -1144,6 +1144,22 @@ static const struct bpf_func_proto bpf_d_path_proto = { > .allowed = bpf_d_path_allowed, > }; > > +BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) > +{ > + if (cpu >= nr_cpu_ids) > + return 0; > + > + return (u64)per_cpu_ptr(ptr, cpu); not sure, but on 32-bit arches this might cause compilation warning, case to (unsigned long) instead? > +} > + > +static const struct bpf_func_proto bpf_per_cpu_ptr_proto = { > + .func = bpf_per_cpu_ptr, > + .gpl_only = false, > + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, > + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, > + .arg2_type = ARG_ANYTHING, > +}; > + [...]
Thanks for review, Andrii. One question, should I add bpf_{per, this}_cpu_ptr() to the bpf_base_func_proto() in kernel/bpf/helpers.c? On Fri, Sep 4, 2020 at 1:04 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Thu, Sep 3, 2020 at 3:35 PM Hao Luo <haoluo@google.com> wrote: > > > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. > > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel > > except that it may return NULL. This happens when the cpu parameter is > > out of range. So the caller must check the returned value. > > > > Acked-by: Andrii Nakryiko <andriin@fb.com> > > Signed-off-by: Hao Luo <haoluo@google.com> > > --- > > include/linux/bpf.h | 3 ++ > > include/linux/btf.h | 11 ++++++ > > include/uapi/linux/bpf.h | 17 +++++++++ > > kernel/bpf/btf.c | 10 ------ > > kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++--- > > kernel/trace/bpf_trace.c | 18 ++++++++++ > > tools/include/uapi/linux/bpf.h | 17 +++++++++ > > 7 files changed, 128 insertions(+), 14 deletions(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index c6d9f2c444f4..6b2034f7665e 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -292,6 +292,7 @@ enum bpf_arg_type { > > ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ > > ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ > > ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ > > + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ > > }; > > > > /* type of values returned from helper functions */ > > @@ -305,6 +306,7 @@ enum bpf_return_type { > > RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ > > RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ > > RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ > > + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ > > }; > > > > /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs > > @@ -385,6 +387,7 @@ enum bpf_reg_type { > > PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ > > PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ > > PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ > > + PTR_TO_PERCPU_BTF_ID, /* reg points to percpu kernel type */ > > }; > > > > /* The information passed from prog-specific *_is_valid_access > > diff --git a/include/linux/btf.h b/include/linux/btf.h > > index 592373d359b9..07b7de1c05b0 100644 > > --- a/include/linux/btf.h > > +++ b/include/linux/btf.h > > @@ -71,6 +71,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, > > i < btf_type_vlen(struct_type); \ > > i++, member++) > > > > +#define for_each_vsi(i, struct_type, member) \ > > datasec_type? > Hmmm, right. It seems to come when copy-pasted from "for_each_member". > > + for (i = 0, member = btf_type_var_secinfo(struct_type); \ > > + i < btf_type_vlen(struct_type); \ > > + i++, member++) > > + > > static inline bool btf_type_is_ptr(const struct btf_type *t) > > { > > return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; > > @@ -155,6 +160,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) > > return (const struct btf_member *)(t + 1); > > } > > > > +static inline const struct btf_var_secinfo *btf_type_var_secinfo( > > + const struct btf_type *t) > > +{ > > + return (const struct btf_var_secinfo *)(t + 1); > > +} > > + > > #ifdef CONFIG_BPF_SYSCALL > > const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); > > const char *btf_name_by_offset(const struct btf *btf, u32 offset); > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index ab00ad9b32e5..d0ec94d5bdbf 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -3596,6 +3596,22 @@ union bpf_attr { > > * the data in *dst*. This is a wrapper of copy_from_user(). > > * Return > > * 0 on success, or a negative error in case of failure. > > + * > > + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) > > + * Description > > + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a > > + * pointer to the percpu kernel variable on *cpu*. A ksym is an > > + * extern variable decorated with '__ksym'. For ksym, there is a > > + * global var (either static or global) defined of the same name > > + * in the kernel. The ksym is percpu if the global var is percpu. > > + * The returned pointer points to the global percpu var on *cpu*. > > + * > > + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the > > + * kernel, except that bpf_per_cpu_ptr() may return NULL. This > > + * happens if *cpu* is larger than nr_cpu_ids. The caller of > > + * bpf_per_cpu_ptr() must check the returned value. > > + * Return > > + * A generic pointer pointing to the kernel percpu variable on *cpu*. > > Or NULL, if *cpu* is invalid. > Ack. > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -3747,6 +3763,7 @@ union bpf_attr { > > FN(inode_storage_delete), \ > > FN(d_path), \ > > FN(copy_from_user), \ > > + FN(bpf_per_cpu_ptr), \ > > /* */ > > > > [...] > > > @@ -4003,6 +4008,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, > > if (type != expected_type) > > goto err_type; > > } > > + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { > > + expected_type = PTR_TO_PERCPU_BTF_ID; > > + if (type != expected_type) > > + goto err_type; > > + if (!reg->btf_id) { > > + verbose(env, "Helper has invalid btf_id in R%d\n", regno); > > + return -EACCES; > > + } > > + meta->ret_btf_id = reg->btf_id; > > } else if (arg_type == ARG_PTR_TO_BTF_ID) { > > bool ids_match = false; > > > > @@ -5002,6 +5016,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > > regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > > regs[BPF_REG_0].id = ++env->id_gen; > > regs[BPF_REG_0].mem_size = meta.mem_size; > > + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { > > Given this is internal implementation detail, this return type is > fine, but I'm wondering if it would be better to just make > PTR_TO_BTF_ID to allow not just structs? E.g., if we have an int, just > allow reading those 4 bytes. > > Not sure what the implications are in terms of implementation, but > conceptually that shouldn't be a problem, given we do have BTF type ID > describing size and all. > Yeah. Totally agree. I looked at it initially. My take is PTR_TO_BTF_ID is meant for struct types. It required some code refactoring to break this assumption. I can add it to my TODO list and investigate it if this makes more sense. > > + const struct btf_type *t; > > + > > + mark_reg_known_zero(env, regs, BPF_REG_0); > > + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); > > + if (!btf_type_is_struct(t)) { > > + u32 tsize; > > + const struct btf_type *ret; > > + const char *tname; > > + > > + /* resolve the type size of ksym. */ > > + ret = btf_resolve_size(btf_vmlinux, t, &tsize); > > + if (IS_ERR(ret)) { > > + tname = btf_name_by_offset(btf_vmlinux, t->name_off); > > + verbose(env, "unable to resolve the size of type '%s': %ld\n", > > + tname, PTR_ERR(ret)); > > + return -EINVAL; > > + } > > + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > > + regs[BPF_REG_0].mem_size = tsize; > > + } else { > > + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; > > + regs[BPF_REG_0].btf_id = meta.ret_btf_id; > > + } > > } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { > > int ret_btf_id; > > > > [...] > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index b2a5380eb187..d474c1530f87 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -1144,6 +1144,22 @@ static const struct bpf_func_proto bpf_d_path_proto = { > > .allowed = bpf_d_path_allowed, > > }; > > > > +BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) > > +{ > > + if (cpu >= nr_cpu_ids) > > + return 0; > > + > > + return (u64)per_cpu_ptr(ptr, cpu); > > not sure, but on 32-bit arches this might cause compilation warning, > case to (unsigned long) instead? > Ah, I see, good catch! Will fix, thanks. > > +} > > + > > +static const struct bpf_func_proto bpf_per_cpu_ptr_proto = { > > + .func = bpf_per_cpu_ptr, > > + .gpl_only = false, > > + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, > > + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, > > + .arg2_type = ARG_ANYTHING, > > +}; > > + > > [...]
On Sun, Sep 13, 2020 at 10:01 PM Hao Luo <haoluo@google.com> wrote: > > Thanks for review, Andrii. > > One question, should I add bpf_{per, this}_cpu_ptr() to the > bpf_base_func_proto() in kernel/bpf/helpers.c? Yes, probably, but given it allows poking at kernel memory, it probably needs to be guarded by perfmon_capable() check, similar to bpf_get_current_task_proto. > > On Fri, Sep 4, 2020 at 1:04 PM Andrii Nakryiko > <andrii.nakryiko@gmail.com> wrote: > > > > On Thu, Sep 3, 2020 at 3:35 PM Hao Luo <haoluo@google.com> wrote: > > > > > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. > > > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel > > > except that it may return NULL. This happens when the cpu parameter is > > > out of range. So the caller must check the returned value. > > > > > > Acked-by: Andrii Nakryiko <andriin@fb.com> > > > Signed-off-by: Hao Luo <haoluo@google.com> > > > --- > > > include/linux/bpf.h | 3 ++ > > > include/linux/btf.h | 11 ++++++ > > > include/uapi/linux/bpf.h | 17 +++++++++ > > > kernel/bpf/btf.c | 10 ------ > > > kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++--- > > > kernel/trace/bpf_trace.c | 18 ++++++++++ > > > tools/include/uapi/linux/bpf.h | 17 +++++++++ > > > 7 files changed, 128 insertions(+), 14 deletions(-) > > > [...] > > > @@ -5002,6 +5016,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn > > > regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; > > > regs[BPF_REG_0].id = ++env->id_gen; > > > regs[BPF_REG_0].mem_size = meta.mem_size; > > > + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { > > > > Given this is internal implementation detail, this return type is > > fine, but I'm wondering if it would be better to just make > > PTR_TO_BTF_ID to allow not just structs? E.g., if we have an int, just > > allow reading those 4 bytes. > > > > Not sure what the implications are in terms of implementation, but > > conceptually that shouldn't be a problem, given we do have BTF type ID > > describing size and all. > > > > Yeah. Totally agree. I looked at it initially. My take is > PTR_TO_BTF_ID is meant for struct types. It required some code > refactoring to break this assumption. I can add it to my TODO list and > investigate it if this makes more sense. PTR_TO_BTF_ID was *implemented* for struct, but at least naming-wise nothing suggests it has to be restricted to structs. But yeah, this should be a separate change, don't block your patches on that. > > > > + const struct btf_type *t; > > > + [...]
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c6d9f2c444f4..6b2034f7665e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -292,6 +292,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ }; /* type of values returned from helper functions */ @@ -305,6 +306,7 @@ enum bpf_return_type { RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -385,6 +387,7 @@ enum bpf_reg_type { PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to percpu kernel type */ }; /* The information passed from prog-specific *_is_valid_access diff --git a/include/linux/btf.h b/include/linux/btf.h index 592373d359b9..07b7de1c05b0 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -71,6 +71,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, i < btf_type_vlen(struct_type); \ i++, member++) +#define for_each_vsi(i, struct_type, member) \ + for (i = 0, member = btf_type_var_secinfo(struct_type); \ + i < btf_type_vlen(struct_type); \ + i++, member++) + static inline bool btf_type_is_ptr(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; @@ -155,6 +160,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) return (const struct btf_member *)(t + 1); } +static inline const struct btf_var_secinfo *btf_type_var_secinfo( + const struct btf_type *t) +{ + return (const struct btf_var_secinfo *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ab00ad9b32e5..d0ec94d5bdbf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3596,6 +3596,22 @@ union bpf_attr { * the data in *dst*. This is a wrapper of copy_from_user(). * Return * 0 on success, or a negative error in case of failure. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A generic pointer pointing to the kernel percpu variable on *cpu*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3747,6 +3763,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(bpf_per_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5831d9f3f3c5..4aa22e31774c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -188,11 +188,6 @@ i < btf_type_vlen(struct_type); \ i++, member++) -#define for_each_vsi(i, struct_type, member) \ - for (i = 0, member = btf_type_var_secinfo(struct_type); \ - i < btf_type_vlen(struct_type); \ - i++, member++) - #define for_each_vsi_from(i, from, struct_type, member) \ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ i < btf_type_vlen(struct_type); \ @@ -513,11 +508,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) -{ - return (const struct btf_var_secinfo *)(t + 1); -} - static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3b382c080cfd..a702600ff581 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,6 +239,7 @@ struct bpf_call_arg_meta { int ref_obj_id; int func_id; u32 btf_id; + u32 ret_btf_id; }; struct btf *btf_vmlinux; @@ -504,6 +505,7 @@ static const char * const reg_type_str[] = { [PTR_TO_XDP_SOCK] = "xdp_sock", [PTR_TO_BTF_ID] = "ptr_", [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", [PTR_TO_MEM] = "mem", [PTR_TO_MEM_OR_NULL] = "mem_or_null", [PTR_TO_RDONLY_BUF] = "rdonly_buf", @@ -570,7 +572,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { - if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL) + if (t == PTR_TO_BTF_ID || + t == PTR_TO_BTF_ID_OR_NULL || + t == PTR_TO_PERCPU_BTF_ID) verbose(env, "%s", kernel_type_name(reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) @@ -2184,6 +2188,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_RDONLY_BUF_OR_NULL: case PTR_TO_RDWR_BUF: case PTR_TO_RDWR_BUF_OR_NULL: + case PTR_TO_PERCPU_BTF_ID: return true; default: return false; @@ -4003,6 +4008,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, if (type != expected_type) goto err_type; } + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { + expected_type = PTR_TO_PERCPU_BTF_ID; + if (type != expected_type) + goto err_type; + if (!reg->btf_id) { + verbose(env, "Helper has invalid btf_id in R%d\n", regno); + return -EACCES; + } + meta->ret_btf_id = reg->btf_id; } else if (arg_type == ARG_PTR_TO_BTF_ID) { bool ids_match = false; @@ -5002,6 +5016,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size; + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { + const struct btf_type *t; + + mark_reg_known_zero(env, regs, BPF_REG_0); + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); + if (!btf_type_is_struct(t)) { + u32 tsize; + const struct btf_type *ret; + const char *tname; + + /* resolve the type size of ksym. */ + ret = btf_resolve_size(btf_vmlinux, t, &tsize); + if (IS_ERR(ret)) { + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + verbose(env, "unable to resolve the size of type '%s': %ld\n", + tname, PTR_ERR(ret)); + return -EINVAL; + } + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].mem_size = tsize; + } else { + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { int ret_btf_id; @@ -7407,7 +7445,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].type = type; if (type == PTR_TO_MEM) { regs[insn->dst_reg].mem_size = meta; - } else if (type == PTR_TO_BTF_ID) { + } else if (type == PTR_TO_BTF_ID || + type == PTR_TO_PERCPU_BTF_ID) { regs[insn->dst_reg].btf_id = meta; } else { verbose(env, "bpf verifier is misconfigured\n"); @@ -9307,10 +9346,14 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux) { - u32 type, id = insn->imm; + u32 datasec_id, type, id = insn->imm; + const struct btf_var_secinfo *vsi; + const struct btf_type *datasec; const struct btf_type *t; const char *sym_name; + bool percpu = false; u64 addr; + int i; if (!btf_vmlinux) { verbose(env, "%s: btf not available. verifier misconfigured.\n", @@ -9343,12 +9386,27 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, return -ENOENT; } + datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", + BTF_KIND_DATASEC); + if (datasec_id > 0) { + datasec = btf_type_by_id(btf_vmlinux, datasec_id); + for_each_vsi(i, datasec, vsi) { + if (vsi->type == id) { + percpu = true; + break; + } + } + } + insn[0].imm = (u32)addr; insn[1].imm = addr >> 32; type = t->type; t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); - if (!btf_type_is_struct(t)) { + if (percpu) { + aux->pseudo_btf_id_type = PTR_TO_PERCPU_BTF_ID; + aux->pseudo_btf_id_meta = type; + } else if (!btf_type_is_struct(t)) { const struct btf_type *ret; const char *tname; u32 tsize; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b2a5380eb187..d474c1530f87 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1144,6 +1144,22 @@ static const struct bpf_func_proto bpf_d_path_proto = { .allowed = bpf_d_path_allowed, }; +BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) +{ + if (cpu >= nr_cpu_ids) + return 0; + + return (u64)per_cpu_ptr(ptr, cpu); +} + +static const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1230,6 +1246,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + case BPF_FUNC_bpf_per_cpu_ptr: + return &bpf_per_cpu_ptr_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ab00ad9b32e5..d0ec94d5bdbf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3596,6 +3596,22 @@ union bpf_attr { * the data in *dst*. This is a wrapper of copy_from_user(). * Return * 0 on success, or a negative error in case of failure. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A generic pointer pointing to the kernel percpu variable on *cpu*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3747,6 +3763,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(bpf_per_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper