diff mbox series

[v3] arm64: Implement prctl(PR_{G,S}ET_TSC)

Message ID 20240824015415.488474-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series [v3] arm64: Implement prctl(PR_{G,S}ET_TSC) | expand

Commit Message

Peter Collingbourne Aug. 24, 2024, 1:54 a.m. UTC
On arm64, this prctl controls access to CNTVCT_EL0, CNTVCTSS_EL0 and
CNTFRQ_EL0 via CNTKCTL_EL1.EL0VCTEN. Since this bit is also used to
implement various erratum workarounds, check whether the CPU needs
a workaround whenever we potentially need to change it.

This is needed for a correct implementation of non-instrumenting
record-replay debugging on arm64 (i.e. rr; https://rr-project.org/).
rr must trap and record any sources of non-determinism from the
userspace program's perspective so it can be replayed later. This
includes the results of syscalls as well as the results of access
to architected timers exposed directly to the program. This prctl
was originally added for x86 by commit 8fb402bccf20 ("generic, x86:
add prctl commands PR_GET_TSC and PR_SET_TSC"), and rr uses it to
trap RDTSC on x86 for the same reason.

We also considered exposing this as a PTRACE_EVENT. However, prctl
seems like a better choice for these reasons:

1) In general an in-process control seems more useful than an
   out-of-process control, since anything that you would be able to
   do with ptrace could also be done with prctl (tracer can inject a
   call to the prctl and handle signal-delivery-stops), and it avoids
   needing an additional process (which will complicate debugging
   of the ptraced process since it cannot have more than one tracer,
   and will be incompatible with ptrace_scope=3) in cases where that
   is not otherwise necessary.

2) Consistency with x86_64. Note that on x86_64, RDTSC has been there
   since the start, so it's the same situation as on arm64.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I233a1867d1ccebe2933a347552e7eae862344421
---
v3:
- rebased to 6.11-rc4
- fixed preemption issue

v2:
- added justification to commit message

 arch/arm64/include/asm/processor.h   |  5 ++
 arch/arm64/include/asm/thread_info.h |  2 +
 arch/arm64/kernel/process.c          | 73 +++++++++++++++++++++++-----
 arch/arm64/kernel/traps.c            | 20 +++++---
 4 files changed, 82 insertions(+), 18 deletions(-)

Comments

Will Deacon Aug. 27, 2024, 3:56 p.m. UTC | #1
On Fri, 23 Aug 2024 18:54:13 -0700, Peter Collingbourne wrote:
> On arm64, this prctl controls access to CNTVCT_EL0, CNTVCTSS_EL0 and
> CNTFRQ_EL0 via CNTKCTL_EL1.EL0VCTEN. Since this bit is also used to
> implement various erratum workarounds, check whether the CPU needs
> a workaround whenever we potentially need to change it.
> 
> This is needed for a correct implementation of non-instrumenting
> record-replay debugging on arm64 (i.e. rr; https://rr-project.org/).
> rr must trap and record any sources of non-determinism from the
> userspace program's perspective so it can be replayed later. This
> includes the results of syscalls as well as the results of access
> to architected timers exposed directly to the program. This prctl
> was originally added for x86 by commit 8fb402bccf20 ("generic, x86:
> add prctl commands PR_GET_TSC and PR_SET_TSC"), and rr uses it to
> trap RDTSC on x86 for the same reason.
> 
> [...]

Applied to arm64 (for-next/timers), thanks!

[1/1] arm64: Implement prctl(PR_{G,S}ET_TSC)
      https://git.kernel.org/arm64/c/3e9e67e12943

Cheers,
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index f77371232d8c6..347bd3464fcbe 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -402,5 +402,10 @@  long get_tagged_addr_ctrl(struct task_struct *task);
 #define GET_TAGGED_ADDR_CTRL()		get_tagged_addr_ctrl(current)
 #endif
 
+int get_tsc_mode(unsigned long adr);
+int set_tsc_mode(unsigned int val);
+#define GET_TSC_CTL(adr)        get_tsc_mode((adr))
+#define SET_TSC_CTL(val)        set_tsc_mode((val))
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e5634..1114c1c3300a1 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -81,6 +81,7 @@  void arch_setup_new_exec(void);
 #define TIF_SME			27	/* SME in use */
 #define TIF_SME_VL_INHERIT	28	/* Inherit SME vl_onexec across exec */
 #define TIF_KERNEL_FPSTATE	29	/* Task is in a kernel mode FPSIMD section */
+#define TIF_TSC_SIGSEGV		30	/* SIGSEGV on counter-timer access */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -97,6 +98,7 @@  void arch_setup_new_exec(void);
 #define _TIF_SVE		(1 << TIF_SVE)
 #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_TSC_SIGSEGV	(1 << TIF_TSC_SIGSEGV)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4ae31b7af6c31..1b6bbf839bb5e 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,6 +43,7 @@ 
 #include <linux/stacktrace.h>
 
 #include <asm/alternative.h>
+#include <asm/arch_timer.h>
 #include <asm/compat.h>
 #include <asm/cpufeature.h>
 #include <asm/cacheflush.h>
@@ -472,27 +473,52 @@  static void entry_task_switch(struct task_struct *next)
 }
 
 /*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
  */
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
 {
-	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
-	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
-		return;
+	struct thread_info *ti = task_thread_info(next);
 
-	if (is_compat_thread(task_thread_info(next)))
+	if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+	    has_erratum_handler(read_cntvct_el0) ||
+	    (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+	     this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+	     is_compat_thread(ti)))
 		sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
 	else
 		sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
 }
 
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+				  struct task_struct *next)
+{
+	if ((read_ti_thread_flags(task_thread_info(prev)) &
+	     (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+	    (read_ti_thread_flags(task_thread_info(next)) &
+	     (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+		update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
 {
+	bool tsc_sigsegv;
+
+	if (val == PR_TSC_SIGSEGV)
+		tsc_sigsegv = true;
+	else if (val == PR_TSC_ENABLE)
+		tsc_sigsegv = false;
+	else
+		return -EINVAL;
+
 	preempt_disable();
-	erratum_1418040_thread_switch(current);
+	update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+	update_cntkctl_el1(current);
 	preempt_enable();
+
+	return 0;
 }
 
 /*
@@ -528,7 +554,7 @@  struct task_struct *__switch_to(struct task_struct *prev,
 	contextidr_thread_switch(next);
 	entry_task_switch(next);
 	ssbs_thread_switch(next);
-	erratum_1418040_thread_switch(next);
+	cntkctl_thread_switch(prev, next);
 	ptrauth_thread_switch_user(next);
 
 	/*
@@ -645,7 +671,7 @@  void arch_setup_new_exec(void)
 	current->mm->context.flags = mmflags;
 	ptrauth_thread_init_user();
 	mte_thread_init_user();
-	erratum_1418040_new_exec();
+	do_set_tsc_mode(PR_TSC_ENABLE);
 
 	if (task_spec_ssb_noexec(current)) {
 		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -754,3 +780,26 @@  int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
 	return prot;
 }
 #endif
+
+int get_tsc_mode(unsigned long adr)
+{
+	unsigned int val;
+
+	if (is_compat_task())
+		return -EINVAL;
+
+	if (test_thread_flag(TIF_TSC_SIGSEGV))
+		val = PR_TSC_SIGSEGV;
+	else
+		val = PR_TSC_ENABLE;
+
+	return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+	if (is_compat_task())
+		return -EINVAL;
+
+	return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9e22683aa9214..baf02ac437f83 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -601,18 +601,26 @@  static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
 
 static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
 {
-	int rt = ESR_ELx_SYS64_ISS_RT(esr);
+	if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+		force_sig(SIGSEGV);
+	} else {
+		int rt = ESR_ELx_SYS64_ISS_RT(esr);
 
-	pt_regs_write_reg(regs, rt, arch_timer_read_counter());
-	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+		pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+	}
 }
 
 static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
 {
-	int rt = ESR_ELx_SYS64_ISS_RT(esr);
+	if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+		force_sig(SIGSEGV);
+	} else {
+		int rt = ESR_ELx_SYS64_ISS_RT(esr);
 
-	pt_regs_write_reg(regs, rt, arch_timer_get_rate());
-	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+		pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+	}
 }
 
 static void mrs_handler(unsigned long esr, struct pt_regs *regs)