diff mbox series

[bpf-next] bpf: introduce helper bpf_raw_read_cpu_clock

Message ID 20211006175106.GA295227@fuller.cnet (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next] bpf: introduce helper bpf_raw_read_cpu_clock | expand

Checks

Context Check Description
bpf/vmtest-bpf-next fail VM_Test
bpf/vmtest-bpf-next-PR fail PR summary
netdev/cover_letter success Single patches do not need cover letters
netdev/fixes_present success Fixes tag not required for -next series
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 21 maintainers not CCed: sean@mess.org revest@chromium.org netdev@vger.kernel.org daniel@iogearbox.net kpsingh@kernel.org ast@kernel.org toke@redhat.com x86@kernel.org john.fastabend@gmail.com hpa@zytor.com joe@cilium.io brouer@redhat.com mingo@redhat.com bp@alien8.de jackmanb@google.com kafai@fb.com yhs@fb.com songliubraving@fb.com mchehab@kernel.org rostedt@goodmis.org linux-media@vger.kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11790 this patch: 11790
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success No Fixes tag
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: please, no space before tabs
netdev/build_allmodconfig_warn success Errors and warnings before: 11421 this patch: 11421
netdev/header_inline success No static functions without inline keyword in header files

Commit Message

Marcelo Tosatti Oct. 6, 2021, 5:51 p.m. UTC
Add bpf_raw_read_cpu_clock helper, to read architecture specific 
CPU clock. In x86's case, this is the TSC.

This is necessary to synchronize bpf traces from host and guest bpf-programs
(after subtracting guest tsc-offset from guest timestamps).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Song Liu Oct. 6, 2021, 9:37 p.m. UTC | #1
On Wed, Oct 6, 2021 at 10:52 AM Marcelo Tosatti <mtosatti@redhat.com> wrote:
>
>
>
> Add bpf_raw_read_cpu_clock helper, to read architecture specific
> CPU clock. In x86's case, this is the TSC.
>
> This is necessary to synchronize bpf traces from host and guest bpf-programs
> (after subtracting guest tsc-offset from guest timestamps).

Trying to understand the use case. So in a host-guest scenario,
bpf_ktime_get_ns()
will return different values in host and guest, but rdtsc() will give
the same value.
Is this correct?

>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index ab83c22d274e..832bb1f65f28 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -95,6 +95,7 @@ config X86
>         select ARCH_HAS_UBSAN_SANITIZE_ALL
>         select ARCH_HAS_DEBUG_WX
>         select ARCH_HAS_ZONE_DMA_SET if EXPERT
> +       select ARCH_HAS_BPF_RAW_CPU_CLOCK
>         select ARCH_HAVE_NMI_SAFE_CMPXCHG
>         select ARCH_MIGHT_HAVE_ACPI_PDC         if ACPI
>         select ARCH_MIGHT_HAVE_PC_PARPORT
> diff --git a/arch/x86/include/asm/bpf_raw_cpu_clock.h b/arch/x86/include/asm/bpf_raw_cpu_clock.h
> new file mode 100644
> index 000000000000..6951c399819e
> --- /dev/null
> +++ b/arch/x86/include/asm/bpf_raw_cpu_clock.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_BPF_RAW_CPU_CLOCK_H_
> +#define _ASM_X86_BPF_RAW_CPU_CLOCK_H_
> +
> +static inline unsigned long long read_raw_cpu_clock(void)
> +{
> +       return rdtsc_ordered();
> +}
> +
> +#endif /* _ASM_X86_BPF_RAW_CPU_CLOCK_H_ */
> diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
> index 3eff08d7b8e5..844a44ff508d 100644
> --- a/drivers/media/rc/bpf-lirc.c
> +++ b/drivers/media/rc/bpf-lirc.c
> @@ -105,6 +105,8 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_ktime_get_ns_proto;
>         case BPF_FUNC_ktime_get_boot_ns:
>                 return &bpf_ktime_get_boot_ns_proto;
> +       case BPF_FUNC_read_raw_cpu_clock:
> +               return &bpf_read_raw_cpu_clock_proto;
>         case BPF_FUNC_tail_call:
>                 return &bpf_tail_call_proto;
>         case BPF_FUNC_get_prandom_u32:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index d604c8251d88..b6cb426085fb 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2058,6 +2058,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
>  extern const struct bpf_func_proto bpf_tail_call_proto;
>  extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
>  extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
> +extern const struct bpf_func_proto bpf_read_raw_cpu_clock_proto;
>  extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
>  extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
>  extern const struct bpf_func_proto bpf_get_current_comm_proto;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6fc59d61937a..52191791b089 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4037,6 +4037,13 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * u64 bpf_read_raw_cpu_clock(void)
> + *     Description
> + *             Return the architecture specific CPU clock value.
> + *             For x86, this is the TSC clock.
> + *     Return
> + *             *CPU clock value*
> + *
>   * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
>   *     Description
>   *             **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
> @@ -5089,6 +5096,7 @@ union bpf_attr {
>         FN(task_pt_regs),               \
>         FN(get_branch_snapshot),        \
>         FN(trace_vprintk),              \
> +       FN(read_raw_cpu_clock),         \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
> index a82d6de86522..5815db157220 100644
> --- a/kernel/bpf/Kconfig
> +++ b/kernel/bpf/Kconfig
> @@ -21,6 +21,10 @@ config HAVE_EBPF_JIT
>  config ARCH_WANT_DEFAULT_BPF_JIT
>         bool
>
> +# Used by archs to tell they support reading raw CPU clock
> +config ARCH_HAS_BPF_RAW_CPU_CLOCK
> +       bool
> +
>  menu "BPF subsystem"
>
>  config BPF_SYSCALL
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index b6c72af64d5d..8e2359dfd582 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2345,6 +2345,8 @@ const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
> +const struct bpf_func_proto bpf_read_raw_cpu_clock_proto __weak;
> +
>
>  const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
>  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1ffd469c217f..90b9e5efaf65 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,10 @@
>
>  #include "../../lib/kstrtox.h"
>
> +#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
> +#include <asm/bpf_raw_cpu_clock.h>
> +#endif
> +
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
>   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> @@ -168,6 +172,21 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
>         .ret_type       = RET_INTEGER,
>  };
>
> +BPF_CALL_0(bpf_read_raw_cpu_clock)
> +{
> +#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
> +       return read_raw_cpu_clock();
> +#else
> +       return sched_clock();
> +#endif
> +}
> +
> +const struct bpf_func_proto bpf_read_raw_cpu_clock_proto = {
> +       .func           = bpf_read_raw_cpu_clock,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +};
> +
>  BPF_CALL_0(bpf_ktime_get_coarse_ns)
>  {
>         return ktime_get_coarse_ns();
> @@ -1366,6 +1385,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_ktime_get_boot_ns_proto;
>         case BPF_FUNC_ktime_get_coarse_ns:
>                 return &bpf_ktime_get_coarse_ns_proto;
> +       case BPF_FUNC_read_raw_cpu_clock:
> +               return &bpf_read_raw_cpu_clock_proto;
>         case BPF_FUNC_ringbuf_output:
>                 return &bpf_ringbuf_output_proto;
>         case BPF_FUNC_ringbuf_reserve:
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 6b3153841a33..047ca7c1d57a 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1113,6 +1113,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_ktime_get_boot_ns_proto;
>         case BPF_FUNC_ktime_get_coarse_ns:
>                 return &bpf_ktime_get_coarse_ns_proto;
> +       case BPF_FUNC_read_raw_cpu_clock:
> +               return &bpf_read_raw_cpu_clock_proto;

With the change in bpf_base_func_proto, this part is not needed.

>         case BPF_FUNC_tail_call:
>                 return &bpf_tail_call_proto;
>         case BPF_FUNC_get_current_pid_tgid:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 6fc59d61937a..52191791b089 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4037,6 +4037,13 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * u64 bpf_read_raw_cpu_clock(void)
> + *     Description
> + *             Return the architecture specific CPU clock value.
> + *             For x86, this is the TSC clock.
> + *     Return
> + *             *CPU clock value*
> + *
>   * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
>   *     Description
>   *             **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
> @@ -5089,6 +5096,7 @@ union bpf_attr {
>         FN(task_pt_regs),               \
>         FN(get_branch_snapshot),        \
>         FN(trace_vprintk),              \
> +       FN(read_raw_cpu_clock),         \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>
Peter Zijlstra Oct. 7, 2021, 7:18 a.m. UTC | #2
On Wed, Oct 06, 2021 at 02:37:09PM -0700, Song Liu wrote:
> On Wed, Oct 6, 2021 at 10:52 AM Marcelo Tosatti <mtosatti@redhat.com> wrote:
> >
> >
> >
> > Add bpf_raw_read_cpu_clock helper, to read architecture specific
> > CPU clock. In x86's case, this is the TSC.
> >
> > This is necessary to synchronize bpf traces from host and guest bpf-programs
> > (after subtracting guest tsc-offset from guest timestamps).
> 
> Trying to understand the use case. So in a host-guest scenario,
> bpf_ktime_get_ns()
> will return different values in host and guest, but rdtsc() will give
> the same value.
> Is this correct?

No, it will not. Also, please explain if any of this stands a chance of
working for anything other than x86. Or even on x86 in the face of
guest migration.

Also, please explain, again, what's wrong with dumping snapshots of
CLOCK_MONOTONIC{,_RAW} from host and guest and correlating time that
way?

And also explain why BPF needs to do this differently than all the other
tracers.
Marcelo Tosatti Oct. 7, 2021, 9:10 a.m. UTC | #3
Hi Peter, Song,

On Thu, Oct 07, 2021 at 09:18:56AM +0200, Peter Zijlstra wrote:
> On Wed, Oct 06, 2021 at 02:37:09PM -0700, Song Liu wrote:
> > On Wed, Oct 6, 2021 at 10:52 AM Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > >
> > >
> > >
> > > Add bpf_raw_read_cpu_clock helper, to read architecture specific
> > > CPU clock. In x86's case, this is the TSC.
> > >
> > > This is necessary to synchronize bpf traces from host and guest bpf-programs
> > > (after subtracting guest tsc-offset from guest timestamps).
> > 
> > Trying to understand the use case. So in a host-guest scenario,
> > bpf_ktime_get_ns()
> > will return different values in host and guest, but rdtsc() will give
> > the same value.
> > Is this correct?
> 
> No, it will not. 

No, but we can find out the delta between host and guest TSCs.

On x86, you can read the offset through debugfs file:

        debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu,
                            &vcpu_tsc_offset_fops);

Other architectures can expose that offset.

> Also, please explain if any of this stands a chance of
> working for anything other than x86. 

Yes, the same pattern repeats

ARM:

With offset between guest and host:
https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-register?lang=en

Without offset:
commit 051ff581ce70e822729e9474941f3c206cbf7436

PPC:
https://yhbt.net/lore/all/5f267a8aec5b8199a580c96ab2b1a3c27de4eb09.camel@gmail.com/T/

(Time Base Register is read through mftb instruction).

> Or even on x86 in the face of
> guest migration.

It won't, but honestly we don't care about tracing at this level across
migration.

> Also, please explain, again, what's wrong with dumping snapshots of
> CLOCK_MONOTONIC{,_RAW} from host and guest and correlating time that
> way?

You can't read the guest and the host clock at the same time (there will always
be some variable delay between reading the two clocks). And that delay
is not fixed, but variable (depending on scheduling of the guest vcpus, 
for example). So you will need an algorithm to estimate their differences, 
with non zero error bounds:

"
 Add a driver with gettime method returning hosts realtime clock.
 This allows Chrony to synchronize host and guest clocks with 
 high precision (see results below).
 
 chronyc> sources
 MS Name/IP address         Stratum Poll Reach LastRx Last sample
 ===============================================================================
 #* PHC0                          0   3   377     6     +4ns[   +4ns] +/-    3ns
"

Now with the hardware clock (which is usually the base for CLOCK_MONOTONIC_RAW),
there are no errors (offset will be 0 ns, rather than 3/4ns).

> And also explain why BPF needs to do this differently than all the other
> tracers.

For x86 we use:

echo "x86-tsc" > /sys/kernel/debug/tracing/trace_clock

For this purpose, on x86, so its not like anything different is being
done?
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ab83c22d274e..832bb1f65f28 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -95,6 +95,7 @@  config X86
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
+	select ARCH_HAS_BPF_RAW_CPU_CLOCK
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/bpf_raw_cpu_clock.h b/arch/x86/include/asm/bpf_raw_cpu_clock.h
new file mode 100644
index 000000000000..6951c399819e
--- /dev/null
+++ b/arch/x86/include/asm/bpf_raw_cpu_clock.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_BPF_RAW_CPU_CLOCK_H_
+#define _ASM_X86_BPF_RAW_CPU_CLOCK_H_
+
+static inline unsigned long long read_raw_cpu_clock(void)
+{
+	return rdtsc_ordered();
+}
+
+#endif /* _ASM_X86_BPF_RAW_CPU_CLOCK_H_ */
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 3eff08d7b8e5..844a44ff508d 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -105,6 +105,8 @@  lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_read_raw_cpu_clock:
+		return &bpf_read_raw_cpu_clock_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_prandom_u32:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d604c8251d88..b6cb426085fb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2058,6 +2058,7 @@  extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
+extern const struct bpf_func_proto bpf_read_raw_cpu_clock_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..52191791b089 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4037,6 +4037,13 @@  union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
+ * u64 bpf_read_raw_cpu_clock(void)
+ * 	Description
+ * 		Return the architecture specific CPU clock value.
+ * 		For x86, this is the TSC clock.
+ * 	Return
+ * 		*CPU clock value*
+ *
  * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
@@ -5089,6 +5096,7 @@  union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(read_raw_cpu_clock),         \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index a82d6de86522..5815db157220 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -21,6 +21,10 @@  config HAVE_EBPF_JIT
 config ARCH_WANT_DEFAULT_BPF_JIT
 	bool
 
+# Used by archs to tell they support reading raw CPU clock
+config ARCH_HAS_BPF_RAW_CPU_CLOCK
+	bool
+
 menu "BPF subsystem"
 
 config BPF_SYSCALL
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b6c72af64d5d..8e2359dfd582 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2345,6 +2345,8 @@  const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
+const struct bpf_func_proto bpf_read_raw_cpu_clock_proto __weak;
+
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469c217f..90b9e5efaf65 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,10 @@ 
 
 #include "../../lib/kstrtox.h"
 
+#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
+#include <asm/bpf_raw_cpu_clock.h>
+#endif
+
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
@@ -168,6 +172,21 @@  const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_read_raw_cpu_clock)
+{
+#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
+	return read_raw_cpu_clock();
+#else
+	return sched_clock();
+#endif
+}
+
+const struct bpf_func_proto bpf_read_raw_cpu_clock_proto = {
+	.func		= bpf_read_raw_cpu_clock,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_ktime_get_coarse_ns)
 {
 	return ktime_get_coarse_ns();
@@ -1366,6 +1385,8 @@  bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_boot_ns_proto;
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
+	case BPF_FUNC_read_raw_cpu_clock:
+		return &bpf_read_raw_cpu_clock_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6b3153841a33..047ca7c1d57a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1113,6 +1113,8 @@  bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ktime_get_boot_ns_proto;
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
+	case BPF_FUNC_read_raw_cpu_clock:
+		return &bpf_read_raw_cpu_clock_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fc59d61937a..52191791b089 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4037,6 +4037,13 @@  union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
+ * u64 bpf_read_raw_cpu_clock(void)
+ * 	Description
+ * 		Return the architecture specific CPU clock value.
+ * 		For x86, this is the TSC clock.
+ * 	Return
+ * 		*CPU clock value*
+ *
  * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
@@ -5089,6 +5096,7 @@  union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(read_raw_cpu_clock),         \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper