diff mbox series

[bpf-next,v1,6/8] bpf: Introduce bpf_per_cpu_ptr()

Message ID 20200819224030.1615203-7-haoluo@google.com
State New
Headers show
Series bpf: BTF support for ksyms | expand

Commit Message

Hao Luo Aug. 19, 2020, 10:40 p.m. UTC
Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
except that it may return NULL. This happens when the cpu parameter is
out of range. So the caller must check the returned value.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 include/linux/bpf.h      |  3 ++
 include/linux/btf.h      | 11 +++++++
 include/uapi/linux/bpf.h | 14 +++++++++
 kernel/bpf/btf.c         | 10 -------
 kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
 kernel/trace/bpf_trace.c | 18 +++++++++++
 6 files changed, 107 insertions(+), 13 deletions(-)

Comments

Andrii Nakryiko Aug. 22, 2020, 3:26 a.m. UTC | #1
On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
>
> Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
> bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
> except that it may return NULL. This happens when the cpu parameter is
> out of range. So the caller must check the returned value.
>
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---

The logic looks correct, few naming nits, but otherwise:

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  include/linux/bpf.h      |  3 ++
>  include/linux/btf.h      | 11 +++++++
>  include/uapi/linux/bpf.h | 14 +++++++++
>  kernel/bpf/btf.c         | 10 -------
>  kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
>  kernel/trace/bpf_trace.c | 18 +++++++++++
>  6 files changed, 107 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 55f694b63164..613404beab33 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -268,6 +268,7 @@ enum bpf_arg_type {
>         ARG_PTR_TO_ALLOC_MEM,   /* pointer to dynamically allocated memory */
>         ARG_PTR_TO_ALLOC_MEM_OR_NULL,   /* pointer to dynamically allocated memory or NULL */
>         ARG_CONST_ALLOC_SIZE_OR_ZERO,   /* number of allocated bytes requested */
> +       ARG_PTR_TO_PERCPU_BTF_ID,       /* pointer to in-kernel percpu type */
>  };
>
>  /* type of values returned from helper functions */
> @@ -281,6 +282,7 @@ enum bpf_return_type {
>         RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
>         RET_PTR_TO_ALLOC_MEM_OR_NULL,   /* returns a pointer to dynamically allocated memory or NULL */
>         RET_PTR_TO_BTF_ID_OR_NULL,      /* returns a pointer to a btf_id or NULL */
> +       RET_PTR_TO_MEM_OR_BTF_OR_NULL,  /* returns a pointer to a valid memory or a btf_id or NULL */

I know it's already very long, but still let's use BTF_ID consistently

>  };
>
>  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
> @@ -360,6 +362,7 @@ enum bpf_reg_type {
>         PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
>         PTR_TO_RDWR_BUF,         /* reg points to a read/write buffer */
>         PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
> +       PTR_TO_PERCPU_BTF_ID,    /* reg points to percpu kernel type */
>  };
>

[...]

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 468376f2910b..c7e49a102ed2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3415,6 +3415,19 @@ union bpf_attr {
>   *             A non-negative value equal to or less than *size* on success,
>   *             or a negative error in case of failure.
>   *
> + * void *bpf_per_cpu_ptr(const void *ptr, u32 cpu)
> + *     Description
> + *             Take the address of a percpu ksym and return a pointer pointing
> + *             to the variable on *cpu*. A ksym is an extern variable decorated
> + *             with '__ksym'. A ksym is percpu if there is a global percpu var
> + *             (either static or global) defined of the same name in the kernel.

The function signature has "ptr", not "ksym", but the description is
using "ksym". please make them consistent (might name param
"percpu_ptr")

> + *
> + *             bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
> + *             kernel, except that bpf_per_cpu_ptr() may return NULL. This
> + *             happens if *cpu* is larger than nr_cpu_ids. The caller of
> + *             bpf_per_cpu_ptr() must check the returned value.
> + *     Return
> + *             A generic pointer pointing to the variable on *cpu*.
>   */

[...]

> +       } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
> +               expected_type = PTR_TO_PERCPU_BTF_ID;
> +               if (type != expected_type)
> +                       goto err_type;
> +               if (!reg->btf_id) {
> +                       verbose(env, "Helper has zero btf_id in R%d\n", regno);

nit: "invalid btf_id"?

> +                       return -EACCES;
> +               }
> +               meta->ret_btf_id = reg->btf_id;
>         } else if (arg_type == ARG_PTR_TO_BTF_ID) {
>                 expected_type = PTR_TO_BTF_ID;
>                 if (type != expected_type)
> @@ -4904,6 +4918,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
>                 regs[BPF_REG_0].id = ++env->id_gen;
>                 regs[BPF_REG_0].mem_size = meta.mem_size;

[...]
Andrii Nakryiko Aug. 22, 2020, 3:31 a.m. UTC | #2
On Fri, Aug 21, 2020 at 8:26 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> >
> > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
> > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
> > except that it may return NULL. This happens when the cpu parameter is
> > out of range. So the caller must check the returned value.
> >
> > Signed-off-by: Hao Luo <haoluo@google.com>
> > ---
>
> The logic looks correct, few naming nits, but otherwise:
>
> Acked-by: Andrii Nakryiko <andriin@fb.com>
>
> >  include/linux/bpf.h      |  3 ++
> >  include/linux/btf.h      | 11 +++++++
> >  include/uapi/linux/bpf.h | 14 +++++++++
> >  kernel/bpf/btf.c         | 10 -------
> >  kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
> >  kernel/trace/bpf_trace.c | 18 +++++++++++
> >  6 files changed, 107 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 55f694b63164..613404beab33 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -268,6 +268,7 @@ enum bpf_arg_type {
> >         ARG_PTR_TO_ALLOC_MEM,   /* pointer to dynamically allocated memory */
> >         ARG_PTR_TO_ALLOC_MEM_OR_NULL,   /* pointer to dynamically allocated memory or NULL */
> >         ARG_CONST_ALLOC_SIZE_OR_ZERO,   /* number of allocated bytes requested */
> > +       ARG_PTR_TO_PERCPU_BTF_ID,       /* pointer to in-kernel percpu type */
> >  };
> >
> >  /* type of values returned from helper functions */
> > @@ -281,6 +282,7 @@ enum bpf_return_type {
> >         RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
> >         RET_PTR_TO_ALLOC_MEM_OR_NULL,   /* returns a pointer to dynamically allocated memory or NULL */
> >         RET_PTR_TO_BTF_ID_OR_NULL,      /* returns a pointer to a btf_id or NULL */
> > +       RET_PTR_TO_MEM_OR_BTF_OR_NULL,  /* returns a pointer to a valid memory or a btf_id or NULL */
>
> I know it's already very long, but still let's use BTF_ID consistently
>
> >  };
> >
> >  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
> > @@ -360,6 +362,7 @@ enum bpf_reg_type {
> >         PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
> >         PTR_TO_RDWR_BUF,         /* reg points to a read/write buffer */
> >         PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
> > +       PTR_TO_PERCPU_BTF_ID,    /* reg points to percpu kernel type */
> >  };
> >
>
> [...]
>
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 468376f2910b..c7e49a102ed2 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3415,6 +3415,19 @@ union bpf_attr {
> >   *             A non-negative value equal to or less than *size* on success,
> >   *             or a negative error in case of failure.
> >   *
> > + * void *bpf_per_cpu_ptr(const void *ptr, u32 cpu)

btw, having bpf_this_cpu_ptr(const void *ptr) seems worthwhile as well, WDYT?

> > + *     Description
> > + *             Take the address of a percpu ksym and return a pointer pointing
> > + *             to the variable on *cpu*. A ksym is an extern variable decorated
> > + *             with '__ksym'. A ksym is percpu if there is a global percpu var
> > + *             (either static or global) defined of the same name in the kernel.
>
> The function signature has "ptr", not "ksym", but the description is
> using "ksym". please make them consistent (might name param
> "percpu_ptr")
>
> > + *
> > + *             bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
> > + *             kernel, except that bpf_per_cpu_ptr() may return NULL. This
> > + *             happens if *cpu* is larger than nr_cpu_ids. The caller of
> > + *             bpf_per_cpu_ptr() must check the returned value.
> > + *     Return
> > + *             A generic pointer pointing to the variable on *cpu*.
> >   */
>
> [...]
>
> > +       } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
> > +               expected_type = PTR_TO_PERCPU_BTF_ID;
> > +               if (type != expected_type)
> > +                       goto err_type;
> > +               if (!reg->btf_id) {
> > +                       verbose(env, "Helper has zero btf_id in R%d\n", regno);
>
> nit: "invalid btf_id"?
>
> > +                       return -EACCES;
> > +               }
> > +               meta->ret_btf_id = reg->btf_id;
> >         } else if (arg_type == ARG_PTR_TO_BTF_ID) {
> >                 expected_type = PTR_TO_BTF_ID;
> >                 if (type != expected_type)
> > @@ -4904,6 +4918,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> >                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
> >                 regs[BPF_REG_0].id = ++env->id_gen;
> >                 regs[BPF_REG_0].mem_size = meta.mem_size;
>
> [...]
Hao Luo Aug. 22, 2020, 7:49 a.m. UTC | #3
On Fri, Aug 21, 2020 at 8:31 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Aug 21, 2020 at 8:26 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > >
> > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
> > > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
> > > except that it may return NULL. This happens when the cpu parameter is
> > > out of range. So the caller must check the returned value.
> > >
> > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > ---
> >
> > The logic looks correct, few naming nits, but otherwise:
> >
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
> >
> > >  include/linux/bpf.h      |  3 ++
> > >  include/linux/btf.h      | 11 +++++++
> > >  include/uapi/linux/bpf.h | 14 +++++++++
> > >  kernel/bpf/btf.c         | 10 -------
> > >  kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
> > >  kernel/trace/bpf_trace.c | 18 +++++++++++
> > >  6 files changed, 107 insertions(+), 13 deletions(-)
[...]
>
> btw, having bpf_this_cpu_ptr(const void *ptr) seems worthwhile as well, WDYT?
>

It's probably not a good idea, IMHO. How does it interact with
preemption? Should we treat it as __this_cpu_ptr()? If so, I feel it's
easy to be misused, if the bpf program is called in a preemptible
context.

Btw, is bpf programs always called with preemption disabled? How about
interrupts? I haven't thought about these questions before but I think
they matter as we start to have more ways for bpf programs to interact
with the kernel.

Best,
Hao
Andrii Nakryiko Aug. 22, 2020, 7:55 a.m. UTC | #4
On Sat, Aug 22, 2020 at 12:49 AM Hao Luo <haoluo@google.com> wrote:
>
> On Fri, Aug 21, 2020 at 8:31 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Aug 21, 2020 at 8:26 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > > >
> > > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
> > > > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
> > > > except that it may return NULL. This happens when the cpu parameter is
> > > > out of range. So the caller must check the returned value.
> > > >
> > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > ---
> > >
> > > The logic looks correct, few naming nits, but otherwise:
> > >
> > > Acked-by: Andrii Nakryiko <andriin@fb.com>
> > >
> > > >  include/linux/bpf.h      |  3 ++
> > > >  include/linux/btf.h      | 11 +++++++
> > > >  include/uapi/linux/bpf.h | 14 +++++++++
> > > >  kernel/bpf/btf.c         | 10 -------
> > > >  kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
> > > >  kernel/trace/bpf_trace.c | 18 +++++++++++
> > > >  6 files changed, 107 insertions(+), 13 deletions(-)
> [...]
> >
> > btw, having bpf_this_cpu_ptr(const void *ptr) seems worthwhile as well, WDYT?
> >
>
> It's probably not a good idea, IMHO. How does it interact with
> preemption? Should we treat it as __this_cpu_ptr()? If so, I feel it's
> easy to be misused, if the bpf program is called in a preemptible
> context.
>
> Btw, is bpf programs always called with preemption disabled? How about
> interrupts? I haven't thought about these questions before but I think
> they matter as we start to have more ways for bpf programs to interact
> with the kernel.

non-sleepable BPF is always disabling CPU migration, so there is no
problem with this_cpu_ptr. For sleepable not sure, but we can disable
this helper for sleepable BPF programs, if that's a problem.

>
> Best,
> Hao
Hao Luo Aug. 25, 2020, 1:03 a.m. UTC | #5
On Sat, Aug 22, 2020 at 12:55 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Aug 22, 2020 at 12:49 AM Hao Luo <haoluo@google.com> wrote:
> >
> > On Fri, Aug 21, 2020 at 8:31 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Fri, Aug 21, 2020 at 8:26 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Wed, Aug 19, 2020 at 3:42 PM Hao Luo <haoluo@google.com> wrote:
> > > > >
> > > > > Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
> > > > > bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
> > > > > except that it may return NULL. This happens when the cpu parameter is
> > > > > out of range. So the caller must check the returned value.
> > > > >
> > > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > > ---
> > > >
> > > > The logic looks correct, few naming nits, but otherwise:
> > > >
> > > > Acked-by: Andrii Nakryiko <andriin@fb.com>
> > > >
> > > > >  include/linux/bpf.h      |  3 ++
> > > > >  include/linux/btf.h      | 11 +++++++
> > > > >  include/uapi/linux/bpf.h | 14 +++++++++
> > > > >  kernel/bpf/btf.c         | 10 -------
> > > > >  kernel/bpf/verifier.c    | 64 ++++++++++++++++++++++++++++++++++++++--
> > > > >  kernel/trace/bpf_trace.c | 18 +++++++++++
> > > > >  6 files changed, 107 insertions(+), 13 deletions(-)
> > [...]
> > >
> > > btw, having bpf_this_cpu_ptr(const void *ptr) seems worthwhile as well, WDYT?
> > >
> >
> > It's probably not a good idea, IMHO. How does it interact with
> > preemption? Should we treat it as __this_cpu_ptr()? If so, I feel it's
> > easy to be misused, if the bpf program is called in a preemptible
> > context.
> >
> > Btw, is bpf programs always called with preemption disabled? How about
> > interrupts? I haven't thought about these questions before but I think
> > they matter as we start to have more ways for bpf programs to interact
> > with the kernel.
>
> non-sleepable BPF is always disabling CPU migration, so there is no
> problem with this_cpu_ptr. For sleepable not sure, but we can disable
> this helper for sleepable BPF programs, if that's a problem.
>

Sounds good. I see there is bpf_get_smp_processor_id() and we are
already doing this. I can add this_cpu_ptr() in v2.

Hao
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 55f694b63164..613404beab33 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,6 +268,7 @@  enum bpf_arg_type {
 	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
 	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
+	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 };
 
 /* type of values returned from helper functions */
@@ -281,6 +282,7 @@  enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
+	RET_PTR_TO_MEM_OR_BTF_OR_NULL,	/* returns a pointer to a valid memory or a btf_id or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -360,6 +362,7 @@  enum bpf_reg_type {
 	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
 	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
+	PTR_TO_PERCPU_BTF_ID,	 /* reg points to percpu kernel type */
 };
 
 /* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cee4089e83c0..dc3509246913 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -72,6 +72,11 @@  btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 	     i < btf_type_vlen(struct_type);			\
 	     i++, member++)
 
+#define for_each_vsi(i, struct_type, member)			\
+	for (i = 0, member = btf_type_var_secinfo(struct_type);	\
+	     i < btf_type_vlen(struct_type);			\
+	     i++, member++)
+
 static inline bool btf_type_is_ptr(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
@@ -156,6 +161,12 @@  static inline const struct btf_member *btf_type_member(const struct btf_type *t)
 	return (const struct btf_member *)(t + 1);
 }
 
+static inline const struct btf_var_secinfo *btf_type_var_secinfo(
+		const struct btf_type *t)
+{
+	return (const struct btf_var_secinfo *)(t + 1);
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 468376f2910b..c7e49a102ed2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3415,6 +3415,19 @@  union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * void *bpf_per_cpu_ptr(const void *ptr, u32 cpu)
+ *	Description
+ *		Take the address of a percpu ksym and return a pointer pointing
+ *		to the variable on *cpu*. A ksym is an extern variable decorated
+ *		with '__ksym'. A ksym is percpu if there is a global percpu var
+ *		(either static or global) defined of the same name in the kernel.
+ *
+ *		bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+ *		kernel, except that bpf_per_cpu_ptr() may return NULL. This
+ *		happens if *cpu* is larger than nr_cpu_ids. The caller of
+ *		bpf_per_cpu_ptr() must check the returned value.
+ *	Return
+ *		A generic pointer pointing to the variable on *cpu*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3559,6 +3572,7 @@  union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(bpf_per_cpu_ptr),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b6d8f653afe2..e735804f5f34 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -186,11 +186,6 @@ 
 	     i < btf_type_vlen(struct_type);				\
 	     i++, member++)
 
-#define for_each_vsi(i, struct_type, member)			\
-	for (i = 0, member = btf_type_var_secinfo(struct_type);	\
-	     i < btf_type_vlen(struct_type);			\
-	     i++, member++)
-
 #define for_each_vsi_from(i, from, struct_type, member)				\
 	for (i = from, member = btf_type_var_secinfo(struct_type) + from;	\
 	     i < btf_type_vlen(struct_type);					\
@@ -511,11 +506,6 @@  static const struct btf_var *btf_type_var(const struct btf_type *t)
 	return (const struct btf_var *)(t + 1);
 }
 
-static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
-{
-	return (const struct btf_var_secinfo *)(t + 1);
-}
-
 static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 {
 	return kind_ops[BTF_INFO_KIND(t->info)];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47badde71f83..c2db6308d6fa 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,6 +238,7 @@  struct bpf_call_arg_meta {
 	int ref_obj_id;
 	int func_id;
 	u32 btf_id;
+	u32 ret_btf_id;
 };
 
 struct btf *btf_vmlinux;
@@ -503,6 +504,7 @@  static const char * const reg_type_str[] = {
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 	[PTR_TO_BTF_ID]		= "ptr_",
 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
+	[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
 	[PTR_TO_MEM]		= "mem",
 	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
 	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
@@ -569,7 +571,9 @@  static void print_verifier_state(struct bpf_verifier_env *env,
 			/* reg->off should be 0 for SCALAR_VALUE */
 			verbose(env, "%lld", reg->var_off.value + reg->off);
 		} else {
-			if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+			if (t == PTR_TO_BTF_ID ||
+			    t == PTR_TO_BTF_ID_OR_NULL ||
+			    t == PTR_TO_PERCPU_BTF_ID)
 				verbose(env, "%s", kernel_type_name(reg->btf_id));
 			verbose(env, "(id=%d", reg->id);
 			if (reg_type_may_be_refcounted_or_null(t))
@@ -2183,6 +2187,7 @@  static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_RDONLY_BUF_OR_NULL:
 	case PTR_TO_RDWR_BUF:
 	case PTR_TO_RDWR_BUF_OR_NULL:
+	case PTR_TO_PERCPU_BTF_ID:
 		return true;
 	default:
 		return false;
@@ -3959,6 +3964,15 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			if (type != expected_type)
 				goto err_type;
 		}
+	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+		expected_type = PTR_TO_PERCPU_BTF_ID;
+		if (type != expected_type)
+			goto err_type;
+		if (!reg->btf_id) {
+			verbose(env, "Helper has zero btf_id in R%d\n", regno);
+			return -EACCES;
+		}
+		meta->ret_btf_id = reg->btf_id;
 	} else if (arg_type == ARG_PTR_TO_BTF_ID) {
 		expected_type = PTR_TO_BTF_ID;
 		if (type != expected_type)
@@ -4904,6 +4918,30 @@  static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
 		regs[BPF_REG_0].id = ++env->id_gen;
 		regs[BPF_REG_0].mem_size = meta.mem_size;
+	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_OR_NULL) {
+		const struct btf_type *t;
+
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+		if (!btf_type_is_struct(t)) {
+			u32 tsize;
+			const struct btf_type *ret;
+			const char *tname;
+
+			/* resolve the type size of ksym. */
+			ret = btf_resolve_size(btf_vmlinux, t, &tsize, NULL, NULL);
+			if (IS_ERR(ret)) {
+				tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+				verbose(env, "unable to resolve the size of type '%s': %ld\n",
+					tname, PTR_ERR(ret));
+				return -EINVAL;
+			}
+			regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
+			regs[BPF_REG_0].mem_size = tsize;
+		} else {
+			regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+		}
 	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
 		int ret_btf_id;
 
@@ -7210,10 +7248,15 @@  static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
 				      struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = cur_regs(env);
-	u32 type, id = insn->imm;
+	u32 datasec_id, type, id = insn->imm;
 	u64 addr;
 	const char *sym_name;
 	const struct btf_type *t = btf_type_by_id(btf_vmlinux, id);
+	const struct btf_type *datasec;
+	const struct btf_var_secinfo *vsi;
+	int i;
+
+	bool percpu = false;
 
 	if (!t) {
 		verbose(env, "%s: invalid btf_id %d\n", __func__, id);
@@ -7243,9 +7286,24 @@  static inline int check_pseudo_btf_id(struct bpf_verifier_env *env,
 	insn[1].imm = addr >> 32;
 	mark_reg_known_zero(env, regs, insn->dst_reg);
 
+	datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+					   BTF_KIND_DATASEC);
+	if (datasec_id > 0) {
+		datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+		for_each_vsi(i, datasec, vsi) {
+			if (vsi->type == id) {
+				percpu = true;
+				break;
+			}
+		}
+	}
+
 	type = t->type;
 	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
-	if (!btf_type_is_struct(t)) {
+	if (percpu) {
+		regs[insn->dst_reg].type = PTR_TO_PERCPU_BTF_ID;
+		regs[insn->dst_reg].btf_id = type;
+	} else if (!btf_type_is_struct(t)) {
 		u32 tsize;
 		const struct btf_type *ret;
 		const char *tname;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a8d4f253ed77..7f0033960d82 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1098,6 +1098,22 @@  static const struct bpf_func_proto bpf_send_signal_thread_proto = {
 	.arg1_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+{
+	if (cpu >= nr_cpu_ids)
+		return 0;
+
+	return (u64)per_cpu_ptr(ptr, cpu);
+}
+
+static const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+	.func		= bpf_per_cpu_ptr,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MEM_OR_BTF_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1182,6 +1198,8 @@  bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_jiffies64_proto;
 	case BPF_FUNC_get_task_stack:
 		return &bpf_get_task_stack_proto;
+	case BPF_FUNC_bpf_per_cpu_ptr:
+		return &bpf_per_cpu_ptr_proto;
 	default:
 		return NULL;
 	}