diff mbox series

[v6,6/8] arm64: KVM: Add interface to set guest value for TRFCR register

Message ID 20240226113044.228403-7-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series kvm/coresight: Support exclude guest and exclude host | expand

Commit Message

James Clark Feb. 26, 2024, 11:30 a.m. UTC
Add an interface for the Coresight driver to use to set the value of the
TRFCR register for the guest. This register controls the exclude
settings for trace at different exception levels, and is used to honor
the exclude_host and exclude_guest parameters from the Perf session.
This will be used to later write TRFCR_EL1 on nVHE at guest switch. For
VHE, the host trace is controlled by TRFCR_EL2 and thus we can write to
the TRFCR_EL1 immediately. Because guest writes to the register are
trapped, the value will persist and can't be modified.

Instead of adding a load of infrastructure to share the host's per-cpu
offsets with the hypervisor, just define the new storage as a NR_CPUS
array.

Signed-off-by: James Clark <james.clark@arm.com>
---
 arch/arm64/include/asm/kvm_host.h |  3 +++
 arch/arm64/kernel/image-vars.h    |  1 +
 arch/arm64/kvm/debug.c            | 29 +++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+)

Comments

Marc Zyngier Feb. 26, 2024, 3:01 p.m. UTC | #1
On Mon, 26 Feb 2024 11:30:34 +0000,
James Clark <james.clark@arm.com> wrote:
> 
> Add an interface for the Coresight driver to use to set the value of the
> TRFCR register for the guest. This register controls the exclude

This is not *for* the guest. It is the *host* value while running the
guest.

> settings for trace at different exception levels, and is used to honor
> the exclude_host and exclude_guest parameters from the Perf session.
> This will be used to later write TRFCR_EL1 on nVHE at guest switch. For
> VHE, the host trace is controlled by TRFCR_EL2 and thus we can write to
> the TRFCR_EL1 immediately. Because guest writes to the register are
> trapped, the value will persist and can't be modified.

See?

> 
> Instead of adding a load of infrastructure to share the host's per-cpu
> offsets with the hypervisor, just define the new storage as a NR_CPUS
> array.
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h |  3 +++
>  arch/arm64/kernel/image-vars.h    |  1 +
>  arch/arm64/kvm/debug.c            | 29 +++++++++++++++++++++++++++++
>  3 files changed, 33 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 85b5477bd1b4..56b7f7eca195 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -509,6 +509,7 @@ struct kvm_host_psci_config {
>  	bool psci_0_1_cpu_off_implemented;
>  	bool psci_0_1_migrate_implemented;
>  };
> +extern u64 ____cacheline_aligned kvm_guest_trfcr[NR_CPUS];

Great. So you are making it a guarantee that this is going to
ping-pong on every CPU that accesses this stuff. I'm sure my nVHE 64
core system is going to enjoy it. Not.

Look, we already have some per-CPU context: it's called kvm_host_data,
and we link to it from each and every vcpu. So as long as you're in
the context of a vcpu, you have access to it. Simples. We even have
accessors that pick the correct instance between VHE and (n/h)VHE.

What is wrong with using that?

>  
>  extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
>  #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
> @@ -1174,6 +1175,7 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
>  void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
>  void kvm_clr_pmu_events(u32 clr);
>  bool kvm_set_pmuserenr(u64 val);
> +void kvm_etm_set_guest_trfcr(u64 trfcr_guest);
>  #else
>  static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
>  static inline void kvm_clr_pmu_events(u32 clr) {}
> @@ -1181,6 +1183,7 @@ static inline bool kvm_set_pmuserenr(u64 val)
>  {
>  	return false;
>  }
> +static inline void kvm_etm_set_guest_trfcr(u64 trfcr_guest) {}
>  #endif
>  
>  void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 31daa1da191c..fe9e2bd7f43a 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -59,6 +59,7 @@ KVM_NVHE_ALIAS(alt_cb_patch_nops);
>  
>  /* Global kernel state accessed by nVHE hyp code. */
>  KVM_NVHE_ALIAS(kvm_vgic_global_state);
> +KVM_NVHE_ALIAS(kvm_guest_trfcr);
>  
>  /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
>  KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 49a13e72ddd2..fe90bc7d6dd4 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -22,6 +22,7 @@
>  				DBG_MDSCR_MDE)
>  
>  static DEFINE_PER_CPU(u64, mdcr_el2);
> +u64 ____cacheline_aligned kvm_guest_trfcr[NR_CPUS];
>  
>  /*
>   * save/restore_guest_debug_regs
> @@ -359,3 +360,31 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
>  	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
>  	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRFCR);
>  }
> +
> +/*
> + * Interface for the Coresight driver to use to set the value of the TRFCR

nit: s/to use//

> + * register for the guest. This register controls the exclude settings for trace

s/for the guest/for *tracing* the guest/

> + * at different exception levels, and is used to honor the exclude_host and
> + * exclude_guest parameters from the Perf session.
> + *
> + * This will be used to later write TRFCR_EL1 on nVHE at guest switch. For VHE,
> + * the host trace is controlled by TRFCR_EL2 and thus we can write to the

s/to the/to/

> + * TRFCR_EL1 immediately. Because guest writes to the register are trapped, the
> + * value will persist and can't be modified. For pKVM, kvm_guest_trfcr can't
> + * be read by the hypervisor, so don't bother writing it.

I don't know what you mean by "can't be read". Because controlling all
of the EL1 memory is not enough?

> + */
> +void kvm_etm_set_guest_trfcr(u64 trfcr_guest)
> +{
> +	if (WARN_ON_ONCE(!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
> +							       ID_AA64DFR0_EL1_TraceFilt_SHIFT)))
> +		return;
> +
> +	/* Warn in invalid use of smp_processor_id() */
> +	WARN_ON_ONCE(preemptible());

What does it buy us to WARN, but continue to do the *wrong* thing?

> +
> +	if (has_vhe())
> +		write_sysreg_s(trfcr_guest, SYS_TRFCR_EL12);

Please use write_sysreg_el1() instead.

> +	else if (!is_protected_kvm_enabled())
> +		kvm_guest_trfcr[smp_processor_id()] = trfcr_guest;
> +}
> +EXPORT_SYMBOL_GPL(kvm_etm_set_guest_trfcr);

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 85b5477bd1b4..56b7f7eca195 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -509,6 +509,7 @@  struct kvm_host_psci_config {
 	bool psci_0_1_cpu_off_implemented;
 	bool psci_0_1_migrate_implemented;
 };
+extern u64 ____cacheline_aligned kvm_guest_trfcr[NR_CPUS];
 
 extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
 #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
@@ -1174,6 +1175,7 @@  void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
 void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
 void kvm_clr_pmu_events(u32 clr);
 bool kvm_set_pmuserenr(u64 val);
+void kvm_etm_set_guest_trfcr(u64 trfcr_guest);
 #else
 static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
@@ -1181,6 +1183,7 @@  static inline bool kvm_set_pmuserenr(u64 val)
 {
 	return false;
 }
+static inline void kvm_etm_set_guest_trfcr(u64 trfcr_guest) {}
 #endif
 
 void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 31daa1da191c..fe9e2bd7f43a 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -59,6 +59,7 @@  KVM_NVHE_ALIAS(alt_cb_patch_nops);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
+KVM_NVHE_ALIAS(kvm_guest_trfcr);
 
 /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
 KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 49a13e72ddd2..fe90bc7d6dd4 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -22,6 +22,7 @@ 
 				DBG_MDSCR_MDE)
 
 static DEFINE_PER_CPU(u64, mdcr_el2);
+u64 ____cacheline_aligned kvm_guest_trfcr[NR_CPUS];
 
 /*
  * save/restore_guest_debug_regs
@@ -359,3 +360,31 @@  void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
 	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
 	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRFCR);
 }
+
+/*
+ * Interface for the Coresight driver to use to set the value of the TRFCR
+ * register for the guest. This register controls the exclude settings for trace
+ * at different exception levels, and is used to honor the exclude_host and
+ * exclude_guest parameters from the Perf session.
+ *
+ * This will be used to later write TRFCR_EL1 on nVHE at guest switch. For VHE,
+ * the host trace is controlled by TRFCR_EL2 and thus we can write to the
+ * TRFCR_EL1 immediately. Because guest writes to the register are trapped, the
+ * value will persist and can't be modified. For pKVM, kvm_guest_trfcr can't
+ * be read by the hypervisor, so don't bother writing it.
+ */
+void kvm_etm_set_guest_trfcr(u64 trfcr_guest)
+{
+	if (WARN_ON_ONCE(!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
+							       ID_AA64DFR0_EL1_TraceFilt_SHIFT)))
+		return;
+
+	/* Warn in invalid use of smp_processor_id() */
+	WARN_ON_ONCE(preemptible());
+
+	if (has_vhe())
+		write_sysreg_s(trfcr_guest, SYS_TRFCR_EL12);
+	else if (!is_protected_kvm_enabled())
+		kvm_guest_trfcr[smp_processor_id()] = trfcr_guest;
+}
+EXPORT_SYMBOL_GPL(kvm_etm_set_guest_trfcr);