diff mbox series

[06/13] KVM: arm64: Add clock support in the nVHE hyp

Message ID 20240911093029.3279154-7-vdonnefort@google.com (mailing list archive)
State New
Delegated to: Steven Rostedt
Headers show
Series Tracefs support for pKVM | expand

Commit Message

Vincent Donnefort Sept. 11, 2024, 9:30 a.m. UTC
By default, the arm64 host kernel is using the arch timer as a source
for sched_clock. Conveniently, EL2 has access to that same counter,
allowing to generate clock values that are synchronized.

The clock needs nonetheless to be setup with the same slope values as
the kernel. Introducing at the same time trace_clock() which is expected
to be later configured by the hypervisor tracing.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>

Comments

John Stultz Sept. 13, 2024, 10:41 p.m. UTC | #1
On Wed, Sep 11, 2024 at 2:31 AM 'Vincent Donnefort' via kernel-team
<kernel-team@android.com> wrote:
>
> By default, the arm64 host kernel is using the arch timer as a source
> for sched_clock. Conveniently, EL2 has access to that same counter,
> allowing to generate clock values that are synchronized.
>
> The clock needs nonetheless to be setup with the same slope values as
> the kernel. Introducing at the same time trace_clock() which is expected
> to be later configured by the hypervisor tracing.
>
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
>
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index c838309e4ec4..355bae0056f0 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -144,5 +144,4 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
>  extern unsigned long kvm_nvhe_sym(__icache_flags);
>  extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
>  extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
> -
>  #endif /* __ARM64_KVM_HYP_H__ */
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/clock.h b/arch/arm64/kvm/hyp/include/nvhe/clock.h
> new file mode 100644
> index 000000000000..2bd05b3b89f9
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/include/nvhe/clock.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
> +#define __ARM64_KVM_HYP_NVHE_CLOCK_H
> +#include <linux/types.h>
> +
> +#include <asm/kvm_hyp.h>
> +
> +#ifdef CONFIG_TRACING
> +void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
> +u64 trace_clock(void);
> +#else
> +static inline void
> +trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
> +static inline u64 trace_clock(void) { return 0; }
> +#endif
> +#endif
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index b43426a493df..323e992089bd 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -28,6 +28,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
>  hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
>          ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
>  hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> +hyp-obj-$(CONFIG_TRACING) += clock.o
>  hyp-obj-y += $(lib-objs)
>
>  ##
> diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
> new file mode 100644
> index 000000000000..0d1f74bc2e11
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/nvhe/clock.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + * Author: Vincent Donnefort <vdonnefort@google.com>
> + */
> +
> +#include <nvhe/clock.h>
> +
> +#include <asm/arch_timer.h>
> +#include <asm/div64.h>
> +
> +static struct clock_data {
> +       struct {
> +               u32 mult;
> +               u32 shift;
> +               u64 epoch_ns;
> +               u64 epoch_cyc;
> +       } data[2];
> +       u64 cur;
> +} trace_clock_data;
> +
> +/* Does not guarantee no reader on the modified bank. */
> +void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
> +{
> +       struct clock_data *clock = &trace_clock_data;
> +       u64 bank = clock->cur ^ 1;
> +
> +       clock->data[bank].mult          = mult;
> +       clock->data[bank].shift         = shift;
> +       clock->data[bank].epoch_ns      = epoch_ns;
> +       clock->data[bank].epoch_cyc     = epoch_cyc;
> +
> +       smp_store_release(&clock->cur, bank);
> +}

Can't see from the context in this patch how it's called, but with
timekeeping there can be multiple updaters (settimeofday, timer tick,
etc).
So would it be smart to have some serialization here to avoid you
don't get parallel updates here?

> +
> +/* Using host provided data. Do not use for anything else than debugging. */
> +u64 trace_clock(void)
> +{
> +       struct clock_data *clock = &trace_clock_data;
> +       u64 bank = smp_load_acquire(&clock->cur);
> +       u64 cyc, ns;
> +
> +       cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
> +
> +       ns = cyc * clock->data[bank].mult;
> +       ns >>= clock->data[bank].shift;
> +
> +       return (u64)ns + clock->data[bank].epoch_ns;
> +}

You might want some overflow protection on the mult?  See the
max_cycles value we use in timekeeping_cycles_to_ns()

-john
Vincent Donnefort Sept. 16, 2024, 12:26 p.m. UTC | #2
[...]

> > +static struct clock_data {
> > +       struct {
> > +               u32 mult;
> > +               u32 shift;
> > +               u64 epoch_ns;
> > +               u64 epoch_cyc;
> > +       } data[2];
> > +       u64 cur;
> > +} trace_clock_data;
> > +
> > +/* Does not guarantee no reader on the modified bank. */
> > +void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
> > +{
> > +       struct clock_data *clock = &trace_clock_data;
> > +       u64 bank = clock->cur ^ 1;
> > +
> > +       clock->data[bank].mult          = mult;
> > +       clock->data[bank].shift         = shift;
> > +       clock->data[bank].epoch_ns      = epoch_ns;
> > +       clock->data[bank].epoch_cyc     = epoch_cyc;
> > +
> > +       smp_store_release(&clock->cur, bank);
> > +}
> 
> Can't see from the context in this patch how it's called, but with
> timekeeping there can be multiple updaters (settimeofday, timer tick,
> etc).
> So would it be smart to have some serialization here to avoid you
> don't get parallel updates here?

Yeah, it is serialized later by the trace_rb_lock spinlock. 
 
> 
> > +
> > +/* Using host provided data. Do not use for anything else than debugging. */
> > +u64 trace_clock(void)
> > +{
> > +       struct clock_data *clock = &trace_clock_data;
> > +       u64 bank = smp_load_acquire(&clock->cur);
> > +       u64 cyc, ns;
> > +
> > +       cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
> > +
> > +       ns = cyc * clock->data[bank].mult;
> > +       ns >>= clock->data[bank].shift;
> > +
> > +       return (u64)ns + clock->data[bank].epoch_ns;
> > +}
> 
> You might want some overflow protection on the mult?  See the
> max_cycles value we use in timekeeping_cycles_to_ns()

In the RFC, I was doing a 128-bits mult. Now as I have the __hyp_clock_work() in
the kernel that keeps the epoch up-to-date, I do not expect this overflow ever.

But then I could combine both to fallback on a slower 128-bits in case the
64-bits overflowed.

> 
> -john
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c838309e4ec4..355bae0056f0 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -144,5 +144,4 @@  extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
 extern unsigned long kvm_nvhe_sym(__icache_flags);
 extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
 extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
-
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/clock.h b/arch/arm64/kvm/hyp/include/nvhe/clock.h
new file mode 100644
index 000000000000..2bd05b3b89f9
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/clock.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
+#define __ARM64_KVM_HYP_NVHE_CLOCK_H
+#include <linux/types.h>
+
+#include <asm/kvm_hyp.h>
+
+#ifdef CONFIG_TRACING
+void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
+u64 trace_clock(void);
+#else
+static inline void
+trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
+static inline u64 trace_clock(void) { return 0; }
+#endif
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index b43426a493df..323e992089bd 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -28,6 +28,7 @@  hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
+hyp-obj-$(CONFIG_TRACING) += clock.o
 hyp-obj-y += $(lib-objs)
 
 ##
diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
new file mode 100644
index 000000000000..0d1f74bc2e11
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/clock.c
@@ -0,0 +1,49 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ * Author: Vincent Donnefort <vdonnefort@google.com>
+ */
+
+#include <nvhe/clock.h>
+
+#include <asm/arch_timer.h>
+#include <asm/div64.h>
+
+static struct clock_data {
+	struct {
+		u32 mult;
+		u32 shift;
+		u64 epoch_ns;
+		u64 epoch_cyc;
+	} data[2];
+	u64 cur;
+} trace_clock_data;
+
+/* Does not guarantee no reader on the modified bank. */
+void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
+{
+	struct clock_data *clock = &trace_clock_data;
+	u64 bank = clock->cur ^ 1;
+
+	clock->data[bank].mult		= mult;
+	clock->data[bank].shift		= shift;
+	clock->data[bank].epoch_ns	= epoch_ns;
+	clock->data[bank].epoch_cyc	= epoch_cyc;
+
+	smp_store_release(&clock->cur, bank);
+}
+
+/* Using host provided data. Do not use for anything else than debugging. */
+u64 trace_clock(void)
+{
+	struct clock_data *clock = &trace_clock_data;
+	u64 bank = smp_load_acquire(&clock->cur);
+	u64 cyc, ns;
+
+	cyc = __arch_counter_get_cntpct() - clock->data[bank].epoch_cyc;
+
+	ns = cyc * clock->data[bank].mult;
+	ns >>= clock->data[bank].shift;
+
+	return (u64)ns + clock->data[bank].epoch_ns;
+}