diff mbox series

[v3,3/3] arm64: entry: Convert to generic entry

Message ID 20240629085601.470241-4-ruanjinjie@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: entry: Convert to generic entry | expand

Commit Message

Jinjie Ruan June 29, 2024, 8:56 a.m. UTC
Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
to use the generic entry infrastructure from kernel/entry/*. The generic
entry makes maintainers' work easier and codes more elegant, which also
removed duplicate 150+ LOC. The changes are below:

 - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK

 - Remove syscall_trace_enter/exit() and use generic one.

 - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
   irqentry_enter/exit().

 - Remove *enter_from/exit_to_user_mode(), and wrap with generic
   irqentry_enter_from/exit_to_user_mode().

 - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().

 - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
   implementing arch_irqentry_exit_need_resched().

Tested ok with following test cases on Qemu cortex-a53 and HiSilicon
Kunpeng-920:

 - Run `perf top` command

 - Switch between different `dynamic preempt` mode

 - Use `pseudo nmi`

 - stress-ng CPU stress test.

 - MTE test case in Documentation/arch/arm64/memory-tagging-extension.rst
   and all test cases in tools/testing/selftests/arm64/mte/* (Only Qemu).

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Tested-by: Jinjie Ruan <ruanjinjie@huawei.com>
Tested-by: Kees Cook <kees@kernel.org>
---
v3:
- Test the MTE test cases.
- Handle forget_syscall() in arch_post_report_syscall_entry()
- Rebased on the previous patch, so move the arch funcs to entry-common.h.
- Change __always_inline() to inline for arch_enter_from_kernel_mode() and
  arch_exit_to_kernel_mode_prepare(), add inline for the other arch funcs.
- Remove unused signal.h for entry-common.h.

v2:
- Add Tested-by.
- Rebased on the refactored report_syscall() code.
- Add comment for arch_prepare_report_syscall_exit().
- Update the commit message.
- Adjust entry-common.h header file inclusion to alphabetical order.
---
 arch/arm64/Kconfig                    |   1 +
 arch/arm64/include/asm/entry-common.h | 172 ++++++++++++
 arch/arm64/include/asm/syscall.h      |   6 +-
 arch/arm64/include/asm/thread_info.h  |  23 +-
 arch/arm64/kernel/entry-common.c      | 368 +++++---------------------
 arch/arm64/kernel/ptrace.c            | 101 -------
 arch/arm64/kernel/signal.c            |   3 +-
 arch/arm64/kernel/syscall.c           |  18 +-
 8 files changed, 264 insertions(+), 428 deletions(-)
 create mode 100644 arch/arm64/include/asm/entry-common.h

Comments

Jinjie Ruan July 23, 2024, 1:39 a.m. UTC | #1
Gentle ping.

On 2024/6/29 16:56, Jinjie Ruan wrote:
> Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
> to use the generic entry infrastructure from kernel/entry/*. The generic
> entry makes maintainers' work easier and codes more elegant, which also
> removed duplicate 150+ LOC. The changes are below:
> 
>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
> 
>  - Remove syscall_trace_enter/exit() and use generic one.
> 
>  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
>    irqentry_enter/exit().
> 
>  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
>    irqentry_enter_from/exit_to_user_mode().
> 
>  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
> 
>  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
>    implementing arch_irqentry_exit_need_resched().
> 
> Tested ok with following test cases on Qemu cortex-a53 and HiSilicon
> Kunpeng-920:
> 
>  - Run `perf top` command
> 
>  - Switch between different `dynamic preempt` mode
> 
>  - Use `pseudo nmi`
> 
>  - stress-ng CPU stress test.
> 
>  - MTE test case in Documentation/arch/arm64/memory-tagging-extension.rst
>    and all test cases in tools/testing/selftests/arm64/mte/* (Only Qemu).
> 
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> Tested-by: Jinjie Ruan <ruanjinjie@huawei.com>
> Tested-by: Kees Cook <kees@kernel.org>
> ---
> v3:
> - Test the MTE test cases.
> - Handle forget_syscall() in arch_post_report_syscall_entry()
> - Rebased on the previous patch, so move the arch funcs to entry-common.h.
> - Change __always_inline() to inline for arch_enter_from_kernel_mode() and
>   arch_exit_to_kernel_mode_prepare(), add inline for the other arch funcs.
> - Remove unused signal.h for entry-common.h.
> 
> v2:
> - Add Tested-by.
> - Rebased on the refactored report_syscall() code.
> - Add comment for arch_prepare_report_syscall_exit().
> - Update the commit message.
> - Adjust entry-common.h header file inclusion to alphabetical order.
> ---
>  arch/arm64/Kconfig                    |   1 +
>  arch/arm64/include/asm/entry-common.h | 172 ++++++++++++
>  arch/arm64/include/asm/syscall.h      |   6 +-
>  arch/arm64/include/asm/thread_info.h  |  23 +-
>  arch/arm64/kernel/entry-common.c      | 368 +++++---------------------
>  arch/arm64/kernel/ptrace.c            | 101 -------
>  arch/arm64/kernel/signal.c            |   3 +-
>  arch/arm64/kernel/syscall.c           |  18 +-
>  8 files changed, 264 insertions(+), 428 deletions(-)
>  create mode 100644 arch/arm64/include/asm/entry-common.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..e6ccc5ea06fe 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -138,6 +138,7 @@ config ARM64
>  	select GENERIC_CPU_DEVICES
>  	select GENERIC_CPU_VULNERABILITIES
>  	select GENERIC_EARLY_IOREMAP
> +	select GENERIC_ENTRY
>  	select GENERIC_IDLE_POLL_SETUP
>  	select GENERIC_IOREMAP
>  	select GENERIC_IRQ_IPI
> diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
> new file mode 100644
> index 000000000000..a14d62b2eb69
> --- /dev/null
> +++ b/arch/arm64/include/asm/entry-common.h
> @@ -0,0 +1,172 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _ASM_ARM64_ENTRY_COMMON_H
> +#define _ASM_ARM64_ENTRY_COMMON_H
> +
> +#include <linux/thread_info.h>
> +
> +#include <asm/daifflags.h>
> +#include <asm/fpsimd.h>
> +#include <asm/mte.h>
> +#include <asm/stacktrace.h>
> +
> +enum ptrace_syscall_dir {
> +	PTRACE_SYSCALL_ENTER = 0,
> +	PTRACE_SYSCALL_EXIT,
> +};
> +
> +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
> +
> +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> +{
> +	mte_disable_tco_entry(current);
> +}
> +
> +#define arch_enter_from_user_mode arch_enter_from_user_mode
> +
> +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
> +							unsigned long ti_work)
> +{
> +	if (ti_work & _TIF_MTE_ASYNC_FAULT) {
> +		clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +		send_sig_fault(SIGSEGV, SEGV_MTEAERR, (void __user *)NULL, current);
> +	}
> +
> +	if (ti_work & _TIF_FOREIGN_FPSTATE)
> +		fpsimd_restore_current_state();
> +}
> +
> +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
> +
> +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> +						  unsigned long ti_work)
> +{
> +	local_daif_mask();
> +	mte_check_tfsr_exit();
> +}
> +
> +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
> +
> +static inline void arch_enter_from_kernel_mode(struct pt_regs *regs)
> +{
> +	mte_check_tfsr_entry();
> +	mte_disable_tco_entry(current);
> +}
> +
> +#define arch_enter_from_kernel_mode arch_enter_from_kernel_mode
> +
> +static inline void arch_exit_to_kernel_mode_prepare(struct pt_regs *regs)
> +{
> +	mte_check_tfsr_exit();
> +}
> +
> +#define arch_exit_to_kernel_mode_prepare arch_exit_to_kernel_mode_prepare
> +
> +static inline bool arch_irqentry_exit_need_resched(void)
> +{
> +	/*
> +	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> +	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> +	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> +	 * DAIF we must have handled an NMI, so skip preemption.
> +	 */
> +	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> +		return false;
> +
> +	/*
> +	 * Preempting a task from an IRQ means we leave copies of PSTATE
> +	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> +	 * resuming one of these preempted tasks would undo those changes.
> +	 *
> +	 * Only allow a task to be preempted once cpufeatures have been
> +	 * enabled.
> +	 */
> +	if (!system_capabilities_finalized())
> +		return false;
> +
> +	return true;
> +}
> +
> +#define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
> +
> +static inline unsigned long arch_prepare_report_syscall_entry(struct pt_regs *regs)
> +{
> +	unsigned long saved_reg;
> +	int regno;
> +
> +	/*
> +	 * We have some ABI weirdness here in the way that we handle syscall
> +	 * exit stops because we indicate whether or not the stop has been
> +	 * signalled from syscall entry or syscall exit by clobbering a general
> +	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> +	 * and restoring its old value after the stop. This means that:
> +	 *
> +	 * - Any writes by the tracer to this register during the stop are
> +	 *   ignored/discarded.
> +	 *
> +	 * - The actual value of the register is not available during the stop,
> +	 *   so the tracer cannot save it and restore it later.
> +	 *
> +	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> +	 *   (the latter do not nobble any registers).
> +	 */
> +	regno = (is_compat_task() ? 12 : 7);
> +	saved_reg = regs->regs[regno];
> +	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> +
> +	return saved_reg;
> +}
> +
> +#define arch_prepare_report_syscall_entry arch_prepare_report_syscall_entry
> +
> +static inline void arch_post_report_syscall_entry(struct pt_regs *regs,
> +						  unsigned long saved_reg, long ret)
> +{
> +	int regno = (is_compat_task() ? 12 : 7);
> +
> +	if (ret)
> +		forget_syscall(regs);
> +
> +	regs->regs[regno] = saved_reg;
> +}
> +
> +#define arch_post_report_syscall_entry arch_post_report_syscall_entry
> +
> +static inline unsigned long arch_prepare_report_syscall_exit(struct pt_regs *regs,
> +							     unsigned long work)
> +{
> +	unsigned long saved_reg;
> +	int regno;
> +
> +	/* See comment for arch_prepare_report_syscall_entry() */
> +	regno = (is_compat_task() ? 12 : 7);
> +	saved_reg = regs->regs[regno];
> +	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> +
> +	if (report_single_step(work)) {
> +		/*
> +		 * Signal a pseudo-step exception since we are stepping but
> +		 * tracer modifications to the registers may have rewound the
> +		 * state machine.
> +		 */
> +		regs->regs[regno] = saved_reg;
> +	}
> +
> +	return saved_reg;
> +}
> +
> +#define arch_prepare_report_syscall_exit arch_prepare_report_syscall_exit
> +
> +static inline void arch_post_report_syscall_exit(struct pt_regs *regs,
> +						 unsigned long saved_reg,
> +						 unsigned long work)
> +{
> +	int regno = (is_compat_task() ? 12 : 7);
> +
> +	if (!report_single_step(work))
> +		regs->regs[regno] = saved_reg;
> +}
> +
> +#define arch_post_report_syscall_exit arch_post_report_syscall_exit
> +
> +#endif /* _ASM_ARM64_ENTRY_COMMON_H */
> diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
> index ab8e14b96f68..9891b15da4c3 100644
> --- a/arch/arm64/include/asm/syscall.h
> +++ b/arch/arm64/include/asm/syscall.h
> @@ -85,7 +85,9 @@ static inline int syscall_get_arch(struct task_struct *task)
>  	return AUDIT_ARCH_AARCH64;
>  }
>  
> -int syscall_trace_enter(struct pt_regs *regs);
> -void syscall_trace_exit(struct pt_regs *regs);
> +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
> +{
> +	return false;
> +}
>  
>  #endif	/* __ASM_SYSCALL_H */
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index e72a3bf9e563..ec5d74c53bf9 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -43,6 +43,7 @@ struct thread_info {
>  	void			*scs_sp;
>  #endif
>  	u32			cpu;
> +	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
>  };
>  
>  #define thread_saved_pc(tsk)	\
> @@ -64,11 +65,6 @@ void arch_setup_new_exec(void);
>  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
>  #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
>  #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
> -#define TIF_SECCOMP		11	/* syscall secure computing */
> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>  #define TIF_FREEZE		19
>  #define TIF_RESTORE_SIGMASK	20
> @@ -86,27 +82,12 @@ void arch_setup_new_exec(void);
>  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>  #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
> -#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
> -#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
> -#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
> -#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
> -#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
> -#define _TIF_UPROBE		(1 << TIF_UPROBE)
> -#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
> +#define _TIF_UPROBE            (1 << TIF_UPROBE)
>  #define _TIF_32BIT		(1 << TIF_32BIT)
>  #define _TIF_SVE		(1 << TIF_SVE)
>  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
>  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
>  
> -#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
> -				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
> -				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
> -				 _TIF_NOTIFY_SIGNAL)
> -
> -#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> -				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
> -				 _TIF_SYSCALL_EMU)
> -
>  #ifdef CONFIG_SHADOW_CALL_STACK
>  #define INIT_SCS							\
>  	.scs_base	= init_shadow_call_stack,			\
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index b77a15955f28..ac61aeb93933 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -14,6 +14,7 @@
>  #include <linux/sched.h>
>  #include <linux/sched/debug.h>
>  #include <linux/thread_info.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/cpufeature.h>
>  #include <asm/daifflags.h>
> @@ -28,201 +29,15 @@
>  #include <asm/sysreg.h>
>  #include <asm/system_misc.h>
>  
> -/*
> - * Handle IRQ/context state management when entering from kernel mode.
> - * Before this function is called it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - *
> - * This is intended to match the logic in irqentry_enter(), handling the kernel
> - * mode transitions only.
> - */
> -static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
> -{
> -	regs->exit_rcu = false;
> -
> -	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
> -		lockdep_hardirqs_off(CALLER_ADDR0);
> -		ct_irq_enter();
> -		trace_hardirqs_off_finish();
> -
> -		regs->exit_rcu = true;
> -		return;
> -	}
> -
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	rcu_irq_enter_check_tick();
> -	trace_hardirqs_off_finish();
> -}
> -
> -static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
> -{
> -	__enter_from_kernel_mode(regs);
> -	mte_check_tfsr_entry();
> -	mte_disable_tco_entry(current);
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting to kernel mode.
> - * After this function returns it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - *
> - * This is intended to match the logic in irqentry_exit(), handling the kernel
> - * mode transitions only, and with preemption handled elsewhere.
> - */
> -static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
> -{
> -	lockdep_assert_irqs_disabled();
> -
> -	if (interrupts_enabled(regs)) {
> -		if (regs->exit_rcu) {
> -			trace_hardirqs_on_prepare();
> -			lockdep_hardirqs_on_prepare();
> -			ct_irq_exit();
> -			lockdep_hardirqs_on(CALLER_ADDR0);
> -			return;
> -		}
> -
> -		trace_hardirqs_on();
> -	} else {
> -		if (regs->exit_rcu)
> -			ct_irq_exit();
> -	}
> -}
> -
> -static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
> -{
> -	mte_check_tfsr_exit();
> -	__exit_to_kernel_mode(regs);
> -}
> -
> -/*
> - * Handle IRQ/context state management when entering from user mode.
> - * Before this function is called it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - */
> -static __always_inline void __enter_from_user_mode(void)
> +static __always_inline void exit_to_user_mode_wrapper(struct pt_regs *regs)
>  {
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	CT_WARN_ON(ct_state() != CONTEXT_USER);
> -	user_exit_irqoff();
> -	trace_hardirqs_off_finish();
> -	mte_disable_tco_entry(current);
> -}
> -
> -static __always_inline void enter_from_user_mode(struct pt_regs *regs)
> -{
> -	__enter_from_user_mode();
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting to user mode.
> - * After this function returns it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - */
> -static __always_inline void __exit_to_user_mode(void)
> -{
> -	trace_hardirqs_on_prepare();
> -	lockdep_hardirqs_on_prepare();
> -	user_enter_irqoff();
> -	lockdep_hardirqs_on(CALLER_ADDR0);
> -}
> -
> -static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
> -{
> -	do {
> -		local_irq_enable();
> -
> -		if (thread_flags & _TIF_NEED_RESCHED)
> -			schedule();
> -
> -		if (thread_flags & _TIF_UPROBE)
> -			uprobe_notify_resume(regs);
> -
> -		if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
> -			clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> -			send_sig_fault(SIGSEGV, SEGV_MTEAERR,
> -				       (void __user *)NULL, current);
> -		}
> -
> -		if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> -			do_signal(regs);
> -
> -		if (thread_flags & _TIF_NOTIFY_RESUME)
> -			resume_user_mode_work(regs);
> -
> -		if (thread_flags & _TIF_FOREIGN_FPSTATE)
> -			fpsimd_restore_current_state();
> -
> -		local_irq_disable();
> -		thread_flags = read_thread_flags();
> -	} while (thread_flags & _TIF_WORK_MASK);
> -}
> -
> -static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
> -{
> -	unsigned long flags;
> -
>  	local_irq_disable();
> -
> -	flags = read_thread_flags();
> -	if (unlikely(flags & _TIF_WORK_MASK))
> -		do_notify_resume(regs, flags);
> -
> -	local_daif_mask();
> -
> -	lockdep_sys_exit();
> -}
> -
> -static __always_inline void exit_to_user_mode(struct pt_regs *regs)
> -{
> -	exit_to_user_mode_prepare(regs);
> -	mte_check_tfsr_exit();
> -	__exit_to_user_mode();
> +	irqentry_exit_to_user_mode(regs);
>  }
>  
>  asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
>  {
> -	exit_to_user_mode(regs);
> -}
> -
> -/*
> - * Handle IRQ/context state management when entering an NMI from user/kernel
> - * mode. Before this function is called it is not safe to call regular kernel
> - * code, instrumentable code, or any code which may trigger an exception.
> - */
> -static void noinstr arm64_enter_nmi(struct pt_regs *regs)
> -{
> -	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
> -
> -	__nmi_enter();
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	lockdep_hardirq_enter();
> -	ct_nmi_enter();
> -
> -	trace_hardirqs_off_finish();
> -	ftrace_nmi_enter();
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting an NMI from user/kernel
> - * mode. After this function returns it is not safe to call regular kernel
> - * code, instrumentable code, or any code which may trigger an exception.
> - */
> -static void noinstr arm64_exit_nmi(struct pt_regs *regs)
> -{
> -	bool restore = regs->lockdep_hardirqs;
> -
> -	ftrace_nmi_exit();
> -	if (restore) {
> -		trace_hardirqs_on_prepare();
> -		lockdep_hardirqs_on_prepare();
> -	}
> -
> -	ct_nmi_exit();
> -	lockdep_hardirq_exit();
> -	if (restore)
> -		lockdep_hardirqs_on(CALLER_ADDR0);
> -	__nmi_exit();
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  /*
> @@ -259,48 +74,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
>  		lockdep_hardirqs_on(CALLER_ADDR0);
>  }
>  
> -#ifdef CONFIG_PREEMPT_DYNAMIC
> -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
> -#define need_irq_preemption() \
> -	(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
> -#else
> -#define need_irq_preemption()	(IS_ENABLED(CONFIG_PREEMPTION))
> -#endif
> -
> -static void __sched arm64_preempt_schedule_irq(void)
> -{
> -	if (!need_irq_preemption())
> -		return;
> -
> -	/*
> -	 * Note: thread_info::preempt_count includes both thread_info::count
> -	 * and thread_info::need_resched, and is not equivalent to
> -	 * preempt_count().
> -	 */
> -	if (READ_ONCE(current_thread_info()->preempt_count) != 0)
> -		return;
> -
> -	/*
> -	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> -	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> -	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> -	 * DAIF we must have handled an NMI, so skip preemption.
> -	 */
> -	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> -		return;
> -
> -	/*
> -	 * Preempting a task from an IRQ means we leave copies of PSTATE
> -	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> -	 * resuming one of these preempted tasks would undo those changes.
> -	 *
> -	 * Only allow a task to be preempted once cpufeatures have been
> -	 * enabled.
> -	 */
> -	if (system_capabilities_finalized())
> -		preempt_schedule_irq();
> -}
> -
>  static void do_interrupt_handler(struct pt_regs *regs,
>  				 void (*handler)(struct pt_regs *))
>  {
> @@ -320,7 +93,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
>  static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
>  				      unsigned long esr)
>  {
> -	arm64_enter_nmi(regs);
> +	irqentry_nmi_enter(regs);
>  
>  	console_verbose();
>  
> @@ -426,41 +199,43 @@ UNHANDLED(el1t, 64, error)
>  static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
> -	enter_from_kernel_mode(regs);
>  	local_daif_inherit(regs);
>  	do_mem_abort(far, esr, regs);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
> -	enter_from_kernel_mode(regs);
>  	local_daif_inherit(regs);
>  	do_sp_pc_abort(far, esr, regs);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_undef(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_bti(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
> @@ -475,11 +250,12 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
>  
>  static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_fpac(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
> @@ -522,23 +298,22 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
>  static __always_inline void __el1_pnmi(struct pt_regs *regs,
>  				       void (*handler)(struct pt_regs *))
>  {
> -	arm64_enter_nmi(regs);
> +	irqentry_state_t state = irqentry_nmi_enter(regs);
> +
>  	do_interrupt_handler(regs, handler);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  }
>  
>  static __always_inline void __el1_irq(struct pt_regs *regs,
>  				      void (*handler)(struct pt_regs *))
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
>  	irq_enter_rcu();
>  	do_interrupt_handler(regs, handler);
>  	irq_exit_rcu();
>  
> -	arm64_preempt_schedule_irq();
> -
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  static void noinstr el1_interrupt(struct pt_regs *regs,
>  				  void (*handler)(struct pt_regs *))
> @@ -564,21 +339,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
>  asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
>  {
>  	unsigned long esr = read_sysreg(esr_el1);
> +	irqentry_state_t state;
>  
>  	local_daif_restore(DAIF_ERRCTX);
> -	arm64_enter_nmi(regs);
> +	state = irqentry_nmi_enter(regs);
>  	do_serror(regs, esr);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  }
>  
>  static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_mem_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
> @@ -593,50 +369,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
>  	if (!is_ttbr0_addr(far))
>  		arm64_apply_bp_hardening();
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_mem_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_fpsimd_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sve_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sme_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_fpsimd_exc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_sys(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
> @@ -646,50 +422,50 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
>  	if (!is_ttbr0_addr(instruction_pointer(regs)))
>  		arm64_apply_bp_hardening();
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sp_pc_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sp_pc_abort(regs->sp, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_undef(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_bti(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_bti(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_mops(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	bad_el0_sync(regs, 0, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
> @@ -697,28 +473,28 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
>  	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	do_debug_exception(far, esr, regs);
>  	local_daif_restore(DAIF_PROCCTX);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_svc(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	cortex_a76_erratum_1463225_svc_handler();
>  	fp_user_discard();
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_svc(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_fpac(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
> @@ -783,7 +559,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
>  static void noinstr el0_interrupt(struct pt_regs *regs,
>  				  void (*handler)(struct pt_regs *))
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  
>  	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
>  
> @@ -794,7 +570,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
>  	do_interrupt_handler(regs, handler);
>  	irq_exit_rcu();
>  
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
> @@ -820,14 +596,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
>  static void noinstr __el0_error_handler_common(struct pt_regs *regs)
>  {
>  	unsigned long esr = read_sysreg(esr_el1);
> +	irqentry_state_t state_nmi;
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_ERRCTX);
> -	arm64_enter_nmi(regs);
> +	state_nmi = irqentry_nmi_enter(regs);
>  	do_serror(regs, esr);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state_nmi);
>  	local_daif_restore(DAIF_PROCCTX);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
> @@ -838,19 +615,19 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
>  #ifdef CONFIG_COMPAT
>  static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_cp15(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_svc_compat(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	cortex_a76_erratum_1463225_svc_handler();
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_svc_compat(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
> @@ -924,7 +701,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
>  	unsigned long esr = read_sysreg(esr_el1);
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	arm64_enter_nmi(regs);
> +	irqentry_nmi_enter(regs);
>  	panic_bad_stack(regs, esr, far);
>  }
>  #endif /* CONFIG_VMAP_STACK */
> @@ -933,6 +710,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
>  asmlinkage noinstr unsigned long
>  __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
>  {
> +	irqentry_state_t state;
>  	unsigned long ret;
>  
>  	/*
> @@ -957,9 +735,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
>  	else if (cpu_has_pan())
>  		set_pstate_pan(0);
>  
> -	arm64_enter_nmi(regs);
> +	state = irqentry_nmi_enter(regs);
>  	ret = do_sdei_event(regs, arg);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  
>  	return ret;
>  }
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 60fd85d5119d..9cc0f450fa76 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -41,9 +41,6 @@
>  #include <asm/traps.h>
>  #include <asm/system_misc.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/syscalls.h>
> -
>  struct pt_regs_offset {
>  	const char *name;
>  	int offset;
> @@ -2179,104 +2176,6 @@ long arch_ptrace(struct task_struct *child, long request,
>  	return ptrace_request(child, request, addr, data);
>  }
>  
> -enum ptrace_syscall_dir {
> -	PTRACE_SYSCALL_ENTER = 0,
> -	PTRACE_SYSCALL_EXIT,
> -};
> -
> -static void report_syscall_enter(struct pt_regs *regs)
> -{
> -	int regno;
> -	unsigned long saved_reg;
> -
> -	/*
> -	 * We have some ABI weirdness here in the way that we handle syscall
> -	 * exit stops because we indicate whether or not the stop has been
> -	 * signalled from syscall entry or syscall exit by clobbering a general
> -	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> -	 * and restoring its old value after the stop. This means that:
> -	 *
> -	 * - Any writes by the tracer to this register during the stop are
> -	 *   ignored/discarded.
> -	 *
> -	 * - The actual value of the register is not available during the stop,
> -	 *   so the tracer cannot save it and restore it later.
> -	 *
> -	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> -	 *   (the latter do not nobble any registers).
> -	 */
> -	regno = (is_compat_task() ? 12 : 7);
> -	saved_reg = regs->regs[regno];
> -	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> -
> -	if (ptrace_report_syscall_entry(regs))
> -		forget_syscall(regs);
> -	regs->regs[regno] = saved_reg;
> -}
> -
> -static void report_syscall_exit(struct pt_regs *regs)
> -{
> -	int regno;
> -	unsigned long saved_reg;
> -
> -	/* See comment for report_syscall_enter() */
> -	regno = (is_compat_task() ? 12 : 7);
> -	saved_reg = regs->regs[regno];
> -	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> -
> -	if (!test_thread_flag(TIF_SINGLESTEP)) {
> -		ptrace_report_syscall_exit(regs, 0);
> -		regs->regs[regno] = saved_reg;
> -	} else {
> -		regs->regs[regno] = saved_reg;
> -
> -		/*
> -		 * Signal a pseudo-step exception since we are stepping but
> -		 * tracer modifications to the registers may have rewound the
> -		 * state machine.
> -		 */
> -		ptrace_report_syscall_exit(regs, 1);
> -	}
> -}
> -
> -int syscall_trace_enter(struct pt_regs *regs)
> -{
> -	unsigned long flags = read_thread_flags();
> -
> -	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
> -		report_syscall_enter(regs);
> -		if (flags & _TIF_SYSCALL_EMU)
> -			return NO_SYSCALL;
> -	}
> -
> -	/* Do the secure computing after ptrace; failures should be fast. */
> -	if (secure_computing() == -1)
> -		return NO_SYSCALL;
> -
> -	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> -		trace_sys_enter(regs, regs->syscallno);
> -
> -	audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
> -			    regs->regs[2], regs->regs[3]);
> -
> -	return regs->syscallno;
> -}
> -
> -void syscall_trace_exit(struct pt_regs *regs)
> -{
> -	unsigned long flags = read_thread_flags();
> -
> -	audit_syscall_exit(regs);
> -
> -	if (flags & _TIF_SYSCALL_TRACEPOINT)
> -		trace_sys_exit(regs, syscall_get_return_value(current, regs));
> -
> -	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
> -		report_syscall_exit(regs);
> -
> -	rseq_syscall(regs);
> -}
> -
>  /*
>   * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
>   * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index 4a77f4976e11..2982f6db6d96 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -19,6 +19,7 @@
>  #include <linux/ratelimit.h>
>  #include <linux/rseq.h>
>  #include <linux/syscalls.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/daifflags.h>
>  #include <asm/debug-monitors.h>
> @@ -1266,7 +1267,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
>   * the kernel can handle, and then we build all the user-level signal handling
>   * stack-frames in one go after that.
>   */
> -void do_signal(struct pt_regs *regs)
> +void arch_do_signal_or_restart(struct pt_regs *regs)
>  {
>  	unsigned long continue_addr = 0, restart_addr = 0;
>  	int retval = 0;
> diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
> index ad198262b981..160ac9d15c27 100644
> --- a/arch/arm64/kernel/syscall.c
> +++ b/arch/arm64/kernel/syscall.c
> @@ -7,6 +7,7 @@
>  #include <linux/ptrace.h>
>  #include <linux/randomize_kstack.h>
>  #include <linux/syscalls.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/debug-monitors.h>
>  #include <asm/exception.h>
> @@ -66,14 +67,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
>  	choose_random_kstack_offset(get_random_u16() & 0x1FF);
>  }
>  
> -static inline bool has_syscall_work(unsigned long flags)
> +static inline bool has_syscall_work(unsigned long work)
>  {
> -	return unlikely(flags & _TIF_SYSCALL_WORK);
> +	return unlikely(work & SYSCALL_WORK_ENTER);
>  }
>  
>  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  			   const syscall_fn_t syscall_table[])
>  {
> +	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
>  	unsigned long flags = read_thread_flags();
>  
>  	regs->orig_x0 = regs->regs[0];
> @@ -107,7 +109,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  		return;
>  	}
>  
> -	if (has_syscall_work(flags)) {
> +	if (has_syscall_work(work)) {
>  		/*
>  		 * The de-facto standard way to skip a system call using ptrace
>  		 * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
> @@ -125,7 +127,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  		 */
>  		if (scno == NO_SYSCALL)
>  			syscall_set_return_value(current, regs, -ENOSYS, 0);
> -		scno = syscall_trace_enter(regs);
> +		scno = syscall_trace_enter(regs, regs->syscallno, work);
>  		if (scno == NO_SYSCALL)
>  			goto trace_exit;
>  	}
> @@ -137,14 +139,14 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  	 * check again. However, if we were tracing entry, then we always trace
>  	 * exit regardless, as the old entry assembly did.
>  	 */
> -	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		flags = read_thread_flags();
> -		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
> +	if (!has_syscall_work(work) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> +		work = READ_ONCE(current_thread_info()->syscall_work);
> +		if (!has_syscall_work(work) && !report_single_step(work))
>  			return;
>  	}
>  
>  trace_exit:
> -	syscall_trace_exit(regs);
> +	syscall_exit_work(regs, work);
>  }
>  
>  void do_el0_svc(struct pt_regs *regs)
Kevin Brodsky Aug. 20, 2024, 11:43 a.m. UTC | #2
On 29/06/2024 10:56, Jinjie Ruan wrote:
> Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
> to use the generic entry infrastructure from kernel/entry/*. The generic
> entry makes maintainers' work easier and codes more elegant, which also
> removed duplicate 150+ LOC. The changes are below:
>
>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
>
>  - Remove syscall_trace_enter/exit() and use generic one.
>
>  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
>    irqentry_enter/exit().
>
>  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
>    irqentry_enter_from/exit_to_user_mode().
>
>  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
>
>  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
>    implementing arch_irqentry_exit_need_resched().

This is looking good to me overall, moving to using the generic helpers
is a clear improvement. I've tried my best to check that the new
implementation is functionally equivalent to the old. Ignoring additions
in the generic code (such as additional instrumentation_{begin,end}()
pairs or assertions), this seems to be the case, with one exception. The
MTE operations are done in a slightly different order on entry:

* mte_disable_tco_entry() was called *after* the generic lockdep/CT
functions in __enter_from_user_mode(). It is now called before those
generic operations - arch_enter_from_user_mode() called at the beginning
of enter_from_user_mode().

* Similarly mte_disable_tco_entry() and mte_check_tfsr_entry() was
called after the generic operations in enter_from_kernel_mode(), and
they are now called after - arch_enter_from_kernel_mode() called at the
beginning of irqentry_enter().

I am not under the impression that these ordering changes are
problematic, but I may be missing something.

> [...]
>  
> -/*
> - * Handle IRQ/context state management when entering from kernel mode.
> - * Before this function is called it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - *
> - * This is intended to match the logic in irqentry_enter(), handling the kernel
> - * mode transitions only.
> - */
> -static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
> -{
> -	regs->exit_rcu = false;

exit_rcu in struct pt_regs is unused now that these functions are gone
so it can be removed.

> [...]
>  
> @@ -259,48 +74,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)

arm64_{enter,exit}_el1_dbg() have apparently no generic counterparts we
can replace them with, but maybe we could align them with the generic
functions some more? Specifically, I'm thinking about making them
return/take an irqentry_state_t just like irqentry_nmi_{enter,exit}().
This way we can get rid of struct pt_regs::lockdep_hardirqs, which is
now only used by those functions.

>  		lockdep_hardirqs_on(CALLER_ADDR0);
>  }
>  
> -#ifdef CONFIG_PREEMPT_DYNAMIC
> -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);

The key is declared in asm/preempt.h, that declaration should also be
removed.

Kevin

> [...]
Jinjie Ruan Aug. 22, 2024, 12:30 p.m. UTC | #3
On 2024/8/20 19:43, Kevin Brodsky wrote:
> On 29/06/2024 10:56, Jinjie Ruan wrote:
>> Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
>> to use the generic entry infrastructure from kernel/entry/*. The generic
>> entry makes maintainers' work easier and codes more elegant, which also
>> removed duplicate 150+ LOC. The changes are below:
>>
>>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
>>
>>  - Remove syscall_trace_enter/exit() and use generic one.
>>
>>  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
>>    irqentry_enter/exit().
>>
>>  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
>>    irqentry_enter_from/exit_to_user_mode().
>>
>>  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
>>
>>  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
>>    implementing arch_irqentry_exit_need_resched().
> 
> This is looking good to me overall, moving to using the generic helpers
> is a clear improvement. I've tried my best to check that the new

Thank you very much for the review, It does make the code more clear.

> implementation is functionally equivalent to the old. Ignoring additions
> in the generic code (such as additional instrumentation_{begin,end}()
> pairs or assertions), this seems to be the case, with one exception. The
> MTE operations are done in a slightly different order on entry:
> 
> * mte_disable_tco_entry() was called *after* the generic lockdep/CT
> functions in __enter_from_user_mode(). It is now called before those
> generic operations - arch_enter_from_user_mode() called at the beginning
> of enter_from_user_mode().

The most special for arm64 are these MTE operations, the comment for
__enter_from_kernel_mode() and __enter_from_user_mode() said:

   " Handle IRQ/context state management when entering from user/kernel
mode. Before this function is called it is not safe to call regular
kernel code "

I guess it is the reason why mte_disable_tco_entry() was called *after*
the generic lockdep/CT functions, but the first version of commit
38ddf7dafaeaf ("arm64: mte: avoid clearing PSTATE.TCO on entry unless
necessary") call it in arch/arm64/kernel/entry.S, so I think the order
is not so important.

> 
> * Similarly mte_disable_tco_entry() and mte_check_tfsr_entry() was
> called after the generic operations in enter_from_kernel_mode(), and
> they are now called after - arch_enter_from_kernel_mode() called at the
> beginning of irqentry_enter().

This can be adjusted to be consistent.

> 
> I am not under the impression that these ordering changes are
> problematic, but I may be missing something.>
>> [...]
>>  
>> -/*
>> - * Handle IRQ/context state management when entering from kernel mode.
>> - * Before this function is called it is not safe to call regular kernel code,
>> - * instrumentable code, or any code which may trigger an exception.
>> - *
>> - * This is intended to match the logic in irqentry_enter(), handling the kernel
>> - * mode transitions only.
>> - */
>> -static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
>> -{
>> -	regs->exit_rcu = false;
> 
> exit_rcu in struct pt_regs is unused now that these functions are gone
> so it can be removed.

It is also consistent with ARM64.

> 
>> [...]
>>  
>> @@ -259,48 +74,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
> 
> arm64_{enter,exit}_el1_dbg() have apparently no generic counterparts we
> can replace them with, but maybe we could align them with the generic
> functions some more? Specifically, I'm thinking about making them
> return/take an irqentry_state_t just like irqentry_nmi_{enter,exit}().
> This way we can get rid of struct pt_regs::lockdep_hardirqs, which is
> now only used by those functions.
> 
>>  		lockdep_hardirqs_on(CALLER_ADDR0);
>>  }
>>  
>> -#ifdef CONFIG_PREEMPT_DYNAMIC
>> -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
> 
> The key is declared in asm/preempt.h, that declaration should also be
> removed.

Yes, you are right! commit 1b2d3451ee50 ("arm64: Support
PREEMPT_DYNAMIC") mentioned it:

    "Since arm64 does not yet use the generic entry code, we must define
our own `sk_dynamic_irqentry_exit_cond_resched`, which will be
enabled/disabled by the common code in kernel/sched/core.c. All other
preemption functions and associated static keys are defined there."

So if arm64 switched to generic entry, the code can be removed.

> 
> Kevin
> 
>> [...]
> 
>
Kevin Brodsky Aug. 26, 2024, 3:56 p.m. UTC | #4
On 22/08/2024 14:30, Jinjie Ruan wrote:
> On 2024/8/20 19:43, Kevin Brodsky wrote:
>> On 29/06/2024 10:56, Jinjie Ruan wrote:
>>> Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
>>> to use the generic entry infrastructure from kernel/entry/*. The generic
>>> entry makes maintainers' work easier and codes more elegant, which also
>>> removed duplicate 150+ LOC. The changes are below:
>>>
>>>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
>>>
>>>  - Remove syscall_trace_enter/exit() and use generic one.
>>>
>>>  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
>>>    irqentry_enter/exit().
>>>
>>>  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
>>>    irqentry_enter_from/exit_to_user_mode().
>>>
>>>  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
>>>
>>>  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
>>>    implementing arch_irqentry_exit_need_resched().
>> This is looking good to me overall, moving to using the generic helpers
>> is a clear improvement. I've tried my best to check that the new
> Thank you very much for the review, It does make the code more clear.
>
>> implementation is functionally equivalent to the old. Ignoring additions
>> in the generic code (such as additional instrumentation_{begin,end}()
>> pairs or assertions), this seems to be the case, with one exception. The
>> MTE operations are done in a slightly different order on entry:
>>
>> * mte_disable_tco_entry() was called *after* the generic lockdep/CT
>> functions in __enter_from_user_mode(). It is now called before those
>> generic operations - arch_enter_from_user_mode() called at the beginning
>> of enter_from_user_mode().
> The most special for arm64 are these MTE operations, the comment for
> __enter_from_kernel_mode() and __enter_from_user_mode() said:
>
>    " Handle IRQ/context state management when entering from user/kernel
> mode. Before this function is called it is not safe to call regular
> kernel code "
>
> I guess it is the reason why mte_disable_tco_entry() was called *after*
> the generic lockdep/CT functions, but the first version of commit
> 38ddf7dafaeaf ("arm64: mte: avoid clearing PSTATE.TCO on entry unless
> necessary") call it in arch/arm64/kernel/entry.S, so I think the order
> is not so important.

Good point, before that commit TCO was disabled before any generic
exception code was run so changing the ordering of
mte_disable_tco_entry() is unlikely to be a problem (the current
implementation doesn't do anything complex).

>
>> * Similarly mte_disable_tco_entry() and mte_check_tfsr_entry() was
>> called after the generic operations in enter_from_kernel_mode(), and
>> they are now called after - arch_enter_from_kernel_mode() called at the
>> beginning of irqentry_enter().
> This can be adjusted to be consistent.

I wonder if moving mte_check_tfsr_entry() is as harmless as
mte_disable_tco_entry(), because the former can cause a stack dump. Is
that safe to do before the lockdep/RCU handling? I would guess so but
would be good for someone to confirm.

Kevin
Jinjie Ruan Sept. 18, 2024, 1:59 a.m. UTC | #5
Hi, Catalin, Will and Mark, do you have any good suggestions and ideas?

On 2024/6/29 16:56, Jinjie Ruan wrote:
> Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
> to use the generic entry infrastructure from kernel/entry/*. The generic
> entry makes maintainers' work easier and codes more elegant, which also
> removed duplicate 150+ LOC. The changes are below:
> 
>  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
> 
>  - Remove syscall_trace_enter/exit() and use generic one.
> 
>  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
>    irqentry_enter/exit().
> 
>  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
>    irqentry_enter_from/exit_to_user_mode().
> 
>  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
> 
>  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
>    implementing arch_irqentry_exit_need_resched().
> 
> Tested ok with following test cases on Qemu cortex-a53 and HiSilicon
> Kunpeng-920:
> 
>  - Run `perf top` command
> 
>  - Switch between different `dynamic preempt` mode
> 
>  - Use `pseudo nmi`
> 
>  - stress-ng CPU stress test.
> 
>  - MTE test case in Documentation/arch/arm64/memory-tagging-extension.rst
>    and all test cases in tools/testing/selftests/arm64/mte/* (Only Qemu).
> 
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> Tested-by: Jinjie Ruan <ruanjinjie@huawei.com>
> Tested-by: Kees Cook <kees@kernel.org>
> ---
> v3:
> - Test the MTE test cases.
> - Handle forget_syscall() in arch_post_report_syscall_entry()
> - Rebased on the previous patch, so move the arch funcs to entry-common.h.
> - Change __always_inline() to inline for arch_enter_from_kernel_mode() and
>   arch_exit_to_kernel_mode_prepare(), add inline for the other arch funcs.
> - Remove unused signal.h for entry-common.h.
> 
> v2:
> - Add Tested-by.
> - Rebased on the refactored report_syscall() code.
> - Add comment for arch_prepare_report_syscall_exit().
> - Update the commit message.
> - Adjust entry-common.h header file inclusion to alphabetical order.
> ---
>  arch/arm64/Kconfig                    |   1 +
>  arch/arm64/include/asm/entry-common.h | 172 ++++++++++++
>  arch/arm64/include/asm/syscall.h      |   6 +-
>  arch/arm64/include/asm/thread_info.h  |  23 +-
>  arch/arm64/kernel/entry-common.c      | 368 +++++---------------------
>  arch/arm64/kernel/ptrace.c            | 101 -------
>  arch/arm64/kernel/signal.c            |   3 +-
>  arch/arm64/kernel/syscall.c           |  18 +-
>  8 files changed, 264 insertions(+), 428 deletions(-)
>  create mode 100644 arch/arm64/include/asm/entry-common.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..e6ccc5ea06fe 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -138,6 +138,7 @@ config ARM64
>  	select GENERIC_CPU_DEVICES
>  	select GENERIC_CPU_VULNERABILITIES
>  	select GENERIC_EARLY_IOREMAP
> +	select GENERIC_ENTRY
>  	select GENERIC_IDLE_POLL_SETUP
>  	select GENERIC_IOREMAP
>  	select GENERIC_IRQ_IPI
> diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
> new file mode 100644
> index 000000000000..a14d62b2eb69
> --- /dev/null
> +++ b/arch/arm64/include/asm/entry-common.h
> @@ -0,0 +1,172 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _ASM_ARM64_ENTRY_COMMON_H
> +#define _ASM_ARM64_ENTRY_COMMON_H
> +
> +#include <linux/thread_info.h>
> +
> +#include <asm/daifflags.h>
> +#include <asm/fpsimd.h>
> +#include <asm/mte.h>
> +#include <asm/stacktrace.h>
> +
> +enum ptrace_syscall_dir {
> +	PTRACE_SYSCALL_ENTER = 0,
> +	PTRACE_SYSCALL_EXIT,
> +};
> +
> +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
> +
> +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> +{
> +	mte_disable_tco_entry(current);
> +}
> +
> +#define arch_enter_from_user_mode arch_enter_from_user_mode
> +
> +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
> +							unsigned long ti_work)
> +{
> +	if (ti_work & _TIF_MTE_ASYNC_FAULT) {
> +		clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +		send_sig_fault(SIGSEGV, SEGV_MTEAERR, (void __user *)NULL, current);
> +	}
> +
> +	if (ti_work & _TIF_FOREIGN_FPSTATE)
> +		fpsimd_restore_current_state();
> +}
> +
> +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
> +
> +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> +						  unsigned long ti_work)
> +{
> +	local_daif_mask();
> +	mte_check_tfsr_exit();
> +}
> +
> +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
> +
> +static inline void arch_enter_from_kernel_mode(struct pt_regs *regs)
> +{
> +	mte_check_tfsr_entry();
> +	mte_disable_tco_entry(current);
> +}
> +
> +#define arch_enter_from_kernel_mode arch_enter_from_kernel_mode
> +
> +static inline void arch_exit_to_kernel_mode_prepare(struct pt_regs *regs)
> +{
> +	mte_check_tfsr_exit();
> +}
> +
> +#define arch_exit_to_kernel_mode_prepare arch_exit_to_kernel_mode_prepare
> +
> +static inline bool arch_irqentry_exit_need_resched(void)
> +{
> +	/*
> +	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> +	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> +	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> +	 * DAIF we must have handled an NMI, so skip preemption.
> +	 */
> +	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> +		return false;
> +
> +	/*
> +	 * Preempting a task from an IRQ means we leave copies of PSTATE
> +	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> +	 * resuming one of these preempted tasks would undo those changes.
> +	 *
> +	 * Only allow a task to be preempted once cpufeatures have been
> +	 * enabled.
> +	 */
> +	if (!system_capabilities_finalized())
> +		return false;
> +
> +	return true;
> +}
> +
> +#define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
> +
> +static inline unsigned long arch_prepare_report_syscall_entry(struct pt_regs *regs)
> +{
> +	unsigned long saved_reg;
> +	int regno;
> +
> +	/*
> +	 * We have some ABI weirdness here in the way that we handle syscall
> +	 * exit stops because we indicate whether or not the stop has been
> +	 * signalled from syscall entry or syscall exit by clobbering a general
> +	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> +	 * and restoring its old value after the stop. This means that:
> +	 *
> +	 * - Any writes by the tracer to this register during the stop are
> +	 *   ignored/discarded.
> +	 *
> +	 * - The actual value of the register is not available during the stop,
> +	 *   so the tracer cannot save it and restore it later.
> +	 *
> +	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> +	 *   (the latter do not nobble any registers).
> +	 */
> +	regno = (is_compat_task() ? 12 : 7);
> +	saved_reg = regs->regs[regno];
> +	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> +
> +	return saved_reg;
> +}
> +
> +#define arch_prepare_report_syscall_entry arch_prepare_report_syscall_entry
> +
> +static inline void arch_post_report_syscall_entry(struct pt_regs *regs,
> +						  unsigned long saved_reg, long ret)
> +{
> +	int regno = (is_compat_task() ? 12 : 7);
> +
> +	if (ret)
> +		forget_syscall(regs);
> +
> +	regs->regs[regno] = saved_reg;
> +}
> +
> +#define arch_post_report_syscall_entry arch_post_report_syscall_entry
> +
> +static inline unsigned long arch_prepare_report_syscall_exit(struct pt_regs *regs,
> +							     unsigned long work)
> +{
> +	unsigned long saved_reg;
> +	int regno;
> +
> +	/* See comment for arch_prepare_report_syscall_entry() */
> +	regno = (is_compat_task() ? 12 : 7);
> +	saved_reg = regs->regs[regno];
> +	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> +
> +	if (report_single_step(work)) {
> +		/*
> +		 * Signal a pseudo-step exception since we are stepping but
> +		 * tracer modifications to the registers may have rewound the
> +		 * state machine.
> +		 */
> +		regs->regs[regno] = saved_reg;
> +	}
> +
> +	return saved_reg;
> +}
> +
> +#define arch_prepare_report_syscall_exit arch_prepare_report_syscall_exit
> +
> +static inline void arch_post_report_syscall_exit(struct pt_regs *regs,
> +						 unsigned long saved_reg,
> +						 unsigned long work)
> +{
> +	int regno = (is_compat_task() ? 12 : 7);
> +
> +	if (!report_single_step(work))
> +		regs->regs[regno] = saved_reg;
> +}
> +
> +#define arch_post_report_syscall_exit arch_post_report_syscall_exit
> +
> +#endif /* _ASM_ARM64_ENTRY_COMMON_H */
> diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
> index ab8e14b96f68..9891b15da4c3 100644
> --- a/arch/arm64/include/asm/syscall.h
> +++ b/arch/arm64/include/asm/syscall.h
> @@ -85,7 +85,9 @@ static inline int syscall_get_arch(struct task_struct *task)
>  	return AUDIT_ARCH_AARCH64;
>  }
>  
> -int syscall_trace_enter(struct pt_regs *regs);
> -void syscall_trace_exit(struct pt_regs *regs);
> +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
> +{
> +	return false;
> +}
>  
>  #endif	/* __ASM_SYSCALL_H */
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index e72a3bf9e563..ec5d74c53bf9 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -43,6 +43,7 @@ struct thread_info {
>  	void			*scs_sp;
>  #endif
>  	u32			cpu;
> +	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
>  };
>  
>  #define thread_saved_pc(tsk)	\
> @@ -64,11 +65,6 @@ void arch_setup_new_exec(void);
>  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
>  #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
>  #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
> -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
> -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
> -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
> -#define TIF_SECCOMP		11	/* syscall secure computing */
> -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>  #define TIF_FREEZE		19
>  #define TIF_RESTORE_SIGMASK	20
> @@ -86,27 +82,12 @@ void arch_setup_new_exec(void);
>  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>  #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
> -#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
> -#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
> -#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
> -#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
> -#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
> -#define _TIF_UPROBE		(1 << TIF_UPROBE)
> -#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
> +#define _TIF_UPROBE            (1 << TIF_UPROBE)
>  #define _TIF_32BIT		(1 << TIF_32BIT)
>  #define _TIF_SVE		(1 << TIF_SVE)
>  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
>  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
>  
> -#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
> -				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
> -				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
> -				 _TIF_NOTIFY_SIGNAL)
> -
> -#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> -				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
> -				 _TIF_SYSCALL_EMU)
> -
>  #ifdef CONFIG_SHADOW_CALL_STACK
>  #define INIT_SCS							\
>  	.scs_base	= init_shadow_call_stack,			\
> diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> index b77a15955f28..ac61aeb93933 100644
> --- a/arch/arm64/kernel/entry-common.c
> +++ b/arch/arm64/kernel/entry-common.c
> @@ -14,6 +14,7 @@
>  #include <linux/sched.h>
>  #include <linux/sched/debug.h>
>  #include <linux/thread_info.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/cpufeature.h>
>  #include <asm/daifflags.h>
> @@ -28,201 +29,15 @@
>  #include <asm/sysreg.h>
>  #include <asm/system_misc.h>
>  
> -/*
> - * Handle IRQ/context state management when entering from kernel mode.
> - * Before this function is called it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - *
> - * This is intended to match the logic in irqentry_enter(), handling the kernel
> - * mode transitions only.
> - */
> -static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
> -{
> -	regs->exit_rcu = false;
> -
> -	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
> -		lockdep_hardirqs_off(CALLER_ADDR0);
> -		ct_irq_enter();
> -		trace_hardirqs_off_finish();
> -
> -		regs->exit_rcu = true;
> -		return;
> -	}
> -
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	rcu_irq_enter_check_tick();
> -	trace_hardirqs_off_finish();
> -}
> -
> -static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
> -{
> -	__enter_from_kernel_mode(regs);
> -	mte_check_tfsr_entry();
> -	mte_disable_tco_entry(current);
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting to kernel mode.
> - * After this function returns it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - *
> - * This is intended to match the logic in irqentry_exit(), handling the kernel
> - * mode transitions only, and with preemption handled elsewhere.
> - */
> -static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
> -{
> -	lockdep_assert_irqs_disabled();
> -
> -	if (interrupts_enabled(regs)) {
> -		if (regs->exit_rcu) {
> -			trace_hardirqs_on_prepare();
> -			lockdep_hardirqs_on_prepare();
> -			ct_irq_exit();
> -			lockdep_hardirqs_on(CALLER_ADDR0);
> -			return;
> -		}
> -
> -		trace_hardirqs_on();
> -	} else {
> -		if (regs->exit_rcu)
> -			ct_irq_exit();
> -	}
> -}
> -
> -static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
> -{
> -	mte_check_tfsr_exit();
> -	__exit_to_kernel_mode(regs);
> -}
> -
> -/*
> - * Handle IRQ/context state management when entering from user mode.
> - * Before this function is called it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - */
> -static __always_inline void __enter_from_user_mode(void)
> +static __always_inline void exit_to_user_mode_wrapper(struct pt_regs *regs)
>  {
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	CT_WARN_ON(ct_state() != CONTEXT_USER);
> -	user_exit_irqoff();
> -	trace_hardirqs_off_finish();
> -	mte_disable_tco_entry(current);
> -}
> -
> -static __always_inline void enter_from_user_mode(struct pt_regs *regs)
> -{
> -	__enter_from_user_mode();
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting to user mode.
> - * After this function returns it is not safe to call regular kernel code,
> - * instrumentable code, or any code which may trigger an exception.
> - */
> -static __always_inline void __exit_to_user_mode(void)
> -{
> -	trace_hardirqs_on_prepare();
> -	lockdep_hardirqs_on_prepare();
> -	user_enter_irqoff();
> -	lockdep_hardirqs_on(CALLER_ADDR0);
> -}
> -
> -static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
> -{
> -	do {
> -		local_irq_enable();
> -
> -		if (thread_flags & _TIF_NEED_RESCHED)
> -			schedule();
> -
> -		if (thread_flags & _TIF_UPROBE)
> -			uprobe_notify_resume(regs);
> -
> -		if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
> -			clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> -			send_sig_fault(SIGSEGV, SEGV_MTEAERR,
> -				       (void __user *)NULL, current);
> -		}
> -
> -		if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> -			do_signal(regs);
> -
> -		if (thread_flags & _TIF_NOTIFY_RESUME)
> -			resume_user_mode_work(regs);
> -
> -		if (thread_flags & _TIF_FOREIGN_FPSTATE)
> -			fpsimd_restore_current_state();
> -
> -		local_irq_disable();
> -		thread_flags = read_thread_flags();
> -	} while (thread_flags & _TIF_WORK_MASK);
> -}
> -
> -static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
> -{
> -	unsigned long flags;
> -
>  	local_irq_disable();
> -
> -	flags = read_thread_flags();
> -	if (unlikely(flags & _TIF_WORK_MASK))
> -		do_notify_resume(regs, flags);
> -
> -	local_daif_mask();
> -
> -	lockdep_sys_exit();
> -}
> -
> -static __always_inline void exit_to_user_mode(struct pt_regs *regs)
> -{
> -	exit_to_user_mode_prepare(regs);
> -	mte_check_tfsr_exit();
> -	__exit_to_user_mode();
> +	irqentry_exit_to_user_mode(regs);
>  }
>  
>  asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
>  {
> -	exit_to_user_mode(regs);
> -}
> -
> -/*
> - * Handle IRQ/context state management when entering an NMI from user/kernel
> - * mode. Before this function is called it is not safe to call regular kernel
> - * code, instrumentable code, or any code which may trigger an exception.
> - */
> -static void noinstr arm64_enter_nmi(struct pt_regs *regs)
> -{
> -	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
> -
> -	__nmi_enter();
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	lockdep_hardirq_enter();
> -	ct_nmi_enter();
> -
> -	trace_hardirqs_off_finish();
> -	ftrace_nmi_enter();
> -}
> -
> -/*
> - * Handle IRQ/context state management when exiting an NMI from user/kernel
> - * mode. After this function returns it is not safe to call regular kernel
> - * code, instrumentable code, or any code which may trigger an exception.
> - */
> -static void noinstr arm64_exit_nmi(struct pt_regs *regs)
> -{
> -	bool restore = regs->lockdep_hardirqs;
> -
> -	ftrace_nmi_exit();
> -	if (restore) {
> -		trace_hardirqs_on_prepare();
> -		lockdep_hardirqs_on_prepare();
> -	}
> -
> -	ct_nmi_exit();
> -	lockdep_hardirq_exit();
> -	if (restore)
> -		lockdep_hardirqs_on(CALLER_ADDR0);
> -	__nmi_exit();
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  /*
> @@ -259,48 +74,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
>  		lockdep_hardirqs_on(CALLER_ADDR0);
>  }
>  
> -#ifdef CONFIG_PREEMPT_DYNAMIC
> -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
> -#define need_irq_preemption() \
> -	(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
> -#else
> -#define need_irq_preemption()	(IS_ENABLED(CONFIG_PREEMPTION))
> -#endif
> -
> -static void __sched arm64_preempt_schedule_irq(void)
> -{
> -	if (!need_irq_preemption())
> -		return;
> -
> -	/*
> -	 * Note: thread_info::preempt_count includes both thread_info::count
> -	 * and thread_info::need_resched, and is not equivalent to
> -	 * preempt_count().
> -	 */
> -	if (READ_ONCE(current_thread_info()->preempt_count) != 0)
> -		return;
> -
> -	/*
> -	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> -	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> -	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> -	 * DAIF we must have handled an NMI, so skip preemption.
> -	 */
> -	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> -		return;
> -
> -	/*
> -	 * Preempting a task from an IRQ means we leave copies of PSTATE
> -	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> -	 * resuming one of these preempted tasks would undo those changes.
> -	 *
> -	 * Only allow a task to be preempted once cpufeatures have been
> -	 * enabled.
> -	 */
> -	if (system_capabilities_finalized())
> -		preempt_schedule_irq();
> -}
> -
>  static void do_interrupt_handler(struct pt_regs *regs,
>  				 void (*handler)(struct pt_regs *))
>  {
> @@ -320,7 +93,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
>  static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
>  				      unsigned long esr)
>  {
> -	arm64_enter_nmi(regs);
> +	irqentry_nmi_enter(regs);
>  
>  	console_verbose();
>  
> @@ -426,41 +199,43 @@ UNHANDLED(el1t, 64, error)
>  static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
> -	enter_from_kernel_mode(regs);
>  	local_daif_inherit(regs);
>  	do_mem_abort(far, esr, regs);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
> -	enter_from_kernel_mode(regs);
>  	local_daif_inherit(regs);
>  	do_sp_pc_abort(far, esr, regs);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_undef(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_bti(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
> @@ -475,11 +250,12 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
>  
>  static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
> +
>  	local_daif_inherit(regs);
>  	do_el1_fpac(regs, esr);
>  	local_daif_mask();
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  
>  asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
> @@ -522,23 +298,22 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
>  static __always_inline void __el1_pnmi(struct pt_regs *regs,
>  				       void (*handler)(struct pt_regs *))
>  {
> -	arm64_enter_nmi(regs);
> +	irqentry_state_t state = irqentry_nmi_enter(regs);
> +
>  	do_interrupt_handler(regs, handler);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  }
>  
>  static __always_inline void __el1_irq(struct pt_regs *regs,
>  				      void (*handler)(struct pt_regs *))
>  {
> -	enter_from_kernel_mode(regs);
> +	irqentry_state_t state = irqentry_enter(regs);
>  
>  	irq_enter_rcu();
>  	do_interrupt_handler(regs, handler);
>  	irq_exit_rcu();
>  
> -	arm64_preempt_schedule_irq();
> -
> -	exit_to_kernel_mode(regs);
> +	irqentry_exit(regs, state);
>  }
>  static void noinstr el1_interrupt(struct pt_regs *regs,
>  				  void (*handler)(struct pt_regs *))
> @@ -564,21 +339,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
>  asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
>  {
>  	unsigned long esr = read_sysreg(esr_el1);
> +	irqentry_state_t state;
>  
>  	local_daif_restore(DAIF_ERRCTX);
> -	arm64_enter_nmi(regs);
> +	state = irqentry_nmi_enter(regs);
>  	do_serror(regs, esr);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  }
>  
>  static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
>  {
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_mem_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
> @@ -593,50 +369,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
>  	if (!is_ttbr0_addr(far))
>  		arm64_apply_bp_hardening();
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_mem_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_fpsimd_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sve_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sme_acc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_fpsimd_exc(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_sys(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
> @@ -646,50 +422,50 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
>  	if (!is_ttbr0_addr(instruction_pointer(regs)))
>  		arm64_apply_bp_hardening();
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sp_pc_abort(far, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_sp_pc_abort(regs->sp, esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_undef(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_bti(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_bti(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_mops(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	bad_el0_sync(regs, 0, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
> @@ -697,28 +473,28 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
>  	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	do_debug_exception(far, esr, regs);
>  	local_daif_restore(DAIF_PROCCTX);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_svc(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	cortex_a76_erratum_1463225_svc_handler();
>  	fp_user_discard();
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_svc(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_fpac(regs, esr);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
> @@ -783,7 +559,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
>  static void noinstr el0_interrupt(struct pt_regs *regs,
>  				  void (*handler)(struct pt_regs *))
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  
>  	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
>  
> @@ -794,7 +570,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
>  	do_interrupt_handler(regs, handler);
>  	irq_exit_rcu();
>  
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
> @@ -820,14 +596,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
>  static void noinstr __el0_error_handler_common(struct pt_regs *regs)
>  {
>  	unsigned long esr = read_sysreg(esr_el1);
> +	irqentry_state_t state_nmi;
>  
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_ERRCTX);
> -	arm64_enter_nmi(regs);
> +	state_nmi = irqentry_nmi_enter(regs);
>  	do_serror(regs, esr);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state_nmi);
>  	local_daif_restore(DAIF_PROCCTX);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
> @@ -838,19 +615,19 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
>  #ifdef CONFIG_COMPAT
>  static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_cp15(esr, regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  static void noinstr el0_svc_compat(struct pt_regs *regs)
>  {
> -	enter_from_user_mode(regs);
> +	irqentry_enter_from_user_mode(regs);
>  	cortex_a76_erratum_1463225_svc_handler();
>  	local_daif_restore(DAIF_PROCCTX);
>  	do_el0_svc_compat(regs);
> -	exit_to_user_mode(regs);
> +	exit_to_user_mode_wrapper(regs);
>  }
>  
>  asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
> @@ -924,7 +701,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
>  	unsigned long esr = read_sysreg(esr_el1);
>  	unsigned long far = read_sysreg(far_el1);
>  
> -	arm64_enter_nmi(regs);
> +	irqentry_nmi_enter(regs);
>  	panic_bad_stack(regs, esr, far);
>  }
>  #endif /* CONFIG_VMAP_STACK */
> @@ -933,6 +710,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
>  asmlinkage noinstr unsigned long
>  __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
>  {
> +	irqentry_state_t state;
>  	unsigned long ret;
>  
>  	/*
> @@ -957,9 +735,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
>  	else if (cpu_has_pan())
>  		set_pstate_pan(0);
>  
> -	arm64_enter_nmi(regs);
> +	state = irqentry_nmi_enter(regs);
>  	ret = do_sdei_event(regs, arg);
> -	arm64_exit_nmi(regs);
> +	irqentry_nmi_exit(regs, state);
>  
>  	return ret;
>  }
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 60fd85d5119d..9cc0f450fa76 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -41,9 +41,6 @@
>  #include <asm/traps.h>
>  #include <asm/system_misc.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/syscalls.h>
> -
>  struct pt_regs_offset {
>  	const char *name;
>  	int offset;
> @@ -2179,104 +2176,6 @@ long arch_ptrace(struct task_struct *child, long request,
>  	return ptrace_request(child, request, addr, data);
>  }
>  
> -enum ptrace_syscall_dir {
> -	PTRACE_SYSCALL_ENTER = 0,
> -	PTRACE_SYSCALL_EXIT,
> -};
> -
> -static void report_syscall_enter(struct pt_regs *regs)
> -{
> -	int regno;
> -	unsigned long saved_reg;
> -
> -	/*
> -	 * We have some ABI weirdness here in the way that we handle syscall
> -	 * exit stops because we indicate whether or not the stop has been
> -	 * signalled from syscall entry or syscall exit by clobbering a general
> -	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> -	 * and restoring its old value after the stop. This means that:
> -	 *
> -	 * - Any writes by the tracer to this register during the stop are
> -	 *   ignored/discarded.
> -	 *
> -	 * - The actual value of the register is not available during the stop,
> -	 *   so the tracer cannot save it and restore it later.
> -	 *
> -	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> -	 *   (the latter do not nobble any registers).
> -	 */
> -	regno = (is_compat_task() ? 12 : 7);
> -	saved_reg = regs->regs[regno];
> -	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> -
> -	if (ptrace_report_syscall_entry(regs))
> -		forget_syscall(regs);
> -	regs->regs[regno] = saved_reg;
> -}
> -
> -static void report_syscall_exit(struct pt_regs *regs)
> -{
> -	int regno;
> -	unsigned long saved_reg;
> -
> -	/* See comment for report_syscall_enter() */
> -	regno = (is_compat_task() ? 12 : 7);
> -	saved_reg = regs->regs[regno];
> -	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> -
> -	if (!test_thread_flag(TIF_SINGLESTEP)) {
> -		ptrace_report_syscall_exit(regs, 0);
> -		regs->regs[regno] = saved_reg;
> -	} else {
> -		regs->regs[regno] = saved_reg;
> -
> -		/*
> -		 * Signal a pseudo-step exception since we are stepping but
> -		 * tracer modifications to the registers may have rewound the
> -		 * state machine.
> -		 */
> -		ptrace_report_syscall_exit(regs, 1);
> -	}
> -}
> -
> -int syscall_trace_enter(struct pt_regs *regs)
> -{
> -	unsigned long flags = read_thread_flags();
> -
> -	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
> -		report_syscall_enter(regs);
> -		if (flags & _TIF_SYSCALL_EMU)
> -			return NO_SYSCALL;
> -	}
> -
> -	/* Do the secure computing after ptrace; failures should be fast. */
> -	if (secure_computing() == -1)
> -		return NO_SYSCALL;
> -
> -	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> -		trace_sys_enter(regs, regs->syscallno);
> -
> -	audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
> -			    regs->regs[2], regs->regs[3]);
> -
> -	return regs->syscallno;
> -}
> -
> -void syscall_trace_exit(struct pt_regs *regs)
> -{
> -	unsigned long flags = read_thread_flags();
> -
> -	audit_syscall_exit(regs);
> -
> -	if (flags & _TIF_SYSCALL_TRACEPOINT)
> -		trace_sys_exit(regs, syscall_get_return_value(current, regs));
> -
> -	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
> -		report_syscall_exit(regs);
> -
> -	rseq_syscall(regs);
> -}
> -
>  /*
>   * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
>   * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index 4a77f4976e11..2982f6db6d96 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -19,6 +19,7 @@
>  #include <linux/ratelimit.h>
>  #include <linux/rseq.h>
>  #include <linux/syscalls.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/daifflags.h>
>  #include <asm/debug-monitors.h>
> @@ -1266,7 +1267,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
>   * the kernel can handle, and then we build all the user-level signal handling
>   * stack-frames in one go after that.
>   */
> -void do_signal(struct pt_regs *regs)
> +void arch_do_signal_or_restart(struct pt_regs *regs)
>  {
>  	unsigned long continue_addr = 0, restart_addr = 0;
>  	int retval = 0;
> diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
> index ad198262b981..160ac9d15c27 100644
> --- a/arch/arm64/kernel/syscall.c
> +++ b/arch/arm64/kernel/syscall.c
> @@ -7,6 +7,7 @@
>  #include <linux/ptrace.h>
>  #include <linux/randomize_kstack.h>
>  #include <linux/syscalls.h>
> +#include <linux/entry-common.h>
>  
>  #include <asm/debug-monitors.h>
>  #include <asm/exception.h>
> @@ -66,14 +67,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
>  	choose_random_kstack_offset(get_random_u16() & 0x1FF);
>  }
>  
> -static inline bool has_syscall_work(unsigned long flags)
> +static inline bool has_syscall_work(unsigned long work)
>  {
> -	return unlikely(flags & _TIF_SYSCALL_WORK);
> +	return unlikely(work & SYSCALL_WORK_ENTER);
>  }
>  
>  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  			   const syscall_fn_t syscall_table[])
>  {
> +	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
>  	unsigned long flags = read_thread_flags();
>  
>  	regs->orig_x0 = regs->regs[0];
> @@ -107,7 +109,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  		return;
>  	}
>  
> -	if (has_syscall_work(flags)) {
> +	if (has_syscall_work(work)) {
>  		/*
>  		 * The de-facto standard way to skip a system call using ptrace
>  		 * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
> @@ -125,7 +127,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  		 */
>  		if (scno == NO_SYSCALL)
>  			syscall_set_return_value(current, regs, -ENOSYS, 0);
> -		scno = syscall_trace_enter(regs);
> +		scno = syscall_trace_enter(regs, regs->syscallno, work);
>  		if (scno == NO_SYSCALL)
>  			goto trace_exit;
>  	}
> @@ -137,14 +139,14 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
>  	 * check again. However, if we were tracing entry, then we always trace
>  	 * exit regardless, as the old entry assembly did.
>  	 */
> -	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		flags = read_thread_flags();
> -		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
> +	if (!has_syscall_work(work) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> +		work = READ_ONCE(current_thread_info()->syscall_work);
> +		if (!has_syscall_work(work) && !report_single_step(work))
>  			return;
>  	}
>  
>  trace_exit:
> -	syscall_trace_exit(regs);
> +	syscall_exit_work(regs, work);
>  }
>  
>  void do_el0_svc(struct pt_regs *regs)
Mark Rutland Sept. 18, 2024, 8:04 a.m. UTC | #6
On Wed, Sep 18, 2024 at 09:59:19AM +0800, Jinjie Ruan wrote:
> Hi, Catalin, Will and Mark, do you have any good suggestions and ideas?

Hi,

Sorry, this is on my queue of things to review, but I haven't had the
chance to review this in detail yet. This needs a thorough review for
things like noinstr safety, and I intend to take a look at this after
Linux Plumbers Conference.

Mark.

> 
> On 2024/6/29 16:56, Jinjie Ruan wrote:
> > Currently, x86, Riscv, Loongarch use the generic entry. Convert arm64
> > to use the generic entry infrastructure from kernel/entry/*. The generic
> > entry makes maintainers' work easier and codes more elegant, which also
> > removed duplicate 150+ LOC. The changes are below:
> > 
> >  - Remove TIF_SYSCALL_* flag, _TIF_WORK_MASK, _TIF_SYSCALL_WORK
> > 
> >  - Remove syscall_trace_enter/exit() and use generic one.
> > 
> >  - Remove *enter_from/exit_to_kernel_mode(), and wrap with generic
> >    irqentry_enter/exit().
> > 
> >  - Remove *enter_from/exit_to_user_mode(), and wrap with generic
> >    irqentry_enter_from/exit_to_user_mode().
> > 
> >  - Remove arm64_enter/exit_nmi() and use generic irqentry_nmi_enter/exit().
> > 
> >  - Remove PREEMPT_DYNAMIC code, as generic entry will do it ok by
> >    implementing arch_irqentry_exit_need_resched().
> > 
> > Tested ok with following test cases on Qemu cortex-a53 and HiSilicon
> > Kunpeng-920:
> > 
> >  - Run `perf top` command
> > 
> >  - Switch between different `dynamic preempt` mode
> > 
> >  - Use `pseudo nmi`
> > 
> >  - stress-ng CPU stress test.
> > 
> >  - MTE test case in Documentation/arch/arm64/memory-tagging-extension.rst
> >    and all test cases in tools/testing/selftests/arm64/mte/* (Only Qemu).
> > 
> > Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> > Tested-by: Jinjie Ruan <ruanjinjie@huawei.com>
> > Tested-by: Kees Cook <kees@kernel.org>
> > ---
> > v3:
> > - Test the MTE test cases.
> > - Handle forget_syscall() in arch_post_report_syscall_entry()
> > - Rebased on the previous patch, so move the arch funcs to entry-common.h.
> > - Change __always_inline() to inline for arch_enter_from_kernel_mode() and
> >   arch_exit_to_kernel_mode_prepare(), add inline for the other arch funcs.
> > - Remove unused signal.h for entry-common.h.
> > 
> > v2:
> > - Add Tested-by.
> > - Rebased on the refactored report_syscall() code.
> > - Add comment for arch_prepare_report_syscall_exit().
> > - Update the commit message.
> > - Adjust entry-common.h header file inclusion to alphabetical order.
> > ---
> >  arch/arm64/Kconfig                    |   1 +
> >  arch/arm64/include/asm/entry-common.h | 172 ++++++++++++
> >  arch/arm64/include/asm/syscall.h      |   6 +-
> >  arch/arm64/include/asm/thread_info.h  |  23 +-
> >  arch/arm64/kernel/entry-common.c      | 368 +++++---------------------
> >  arch/arm64/kernel/ptrace.c            | 101 -------
> >  arch/arm64/kernel/signal.c            |   3 +-
> >  arch/arm64/kernel/syscall.c           |  18 +-
> >  8 files changed, 264 insertions(+), 428 deletions(-)
> >  create mode 100644 arch/arm64/include/asm/entry-common.h
> > 
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 5d91259ee7b5..e6ccc5ea06fe 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -138,6 +138,7 @@ config ARM64
> >  	select GENERIC_CPU_DEVICES
> >  	select GENERIC_CPU_VULNERABILITIES
> >  	select GENERIC_EARLY_IOREMAP
> > +	select GENERIC_ENTRY
> >  	select GENERIC_IDLE_POLL_SETUP
> >  	select GENERIC_IOREMAP
> >  	select GENERIC_IRQ_IPI
> > diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
> > new file mode 100644
> > index 000000000000..a14d62b2eb69
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/entry-common.h
> > @@ -0,0 +1,172 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef _ASM_ARM64_ENTRY_COMMON_H
> > +#define _ASM_ARM64_ENTRY_COMMON_H
> > +
> > +#include <linux/thread_info.h>
> > +
> > +#include <asm/daifflags.h>
> > +#include <asm/fpsimd.h>
> > +#include <asm/mte.h>
> > +#include <asm/stacktrace.h>
> > +
> > +enum ptrace_syscall_dir {
> > +	PTRACE_SYSCALL_ENTER = 0,
> > +	PTRACE_SYSCALL_EXIT,
> > +};
> > +
> > +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
> > +
> > +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> > +{
> > +	mte_disable_tco_entry(current);
> > +}
> > +
> > +#define arch_enter_from_user_mode arch_enter_from_user_mode
> > +
> > +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
> > +							unsigned long ti_work)
> > +{
> > +	if (ti_work & _TIF_MTE_ASYNC_FAULT) {
> > +		clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> > +		send_sig_fault(SIGSEGV, SEGV_MTEAERR, (void __user *)NULL, current);
> > +	}
> > +
> > +	if (ti_work & _TIF_FOREIGN_FPSTATE)
> > +		fpsimd_restore_current_state();
> > +}
> > +
> > +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
> > +
> > +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> > +						  unsigned long ti_work)
> > +{
> > +	local_daif_mask();
> > +	mte_check_tfsr_exit();
> > +}
> > +
> > +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
> > +
> > +static inline void arch_enter_from_kernel_mode(struct pt_regs *regs)
> > +{
> > +	mte_check_tfsr_entry();
> > +	mte_disable_tco_entry(current);
> > +}
> > +
> > +#define arch_enter_from_kernel_mode arch_enter_from_kernel_mode
> > +
> > +static inline void arch_exit_to_kernel_mode_prepare(struct pt_regs *regs)
> > +{
> > +	mte_check_tfsr_exit();
> > +}
> > +
> > +#define arch_exit_to_kernel_mode_prepare arch_exit_to_kernel_mode_prepare
> > +
> > +static inline bool arch_irqentry_exit_need_resched(void)
> > +{
> > +	/*
> > +	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> > +	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> > +	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> > +	 * DAIF we must have handled an NMI, so skip preemption.
> > +	 */
> > +	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> > +		return false;
> > +
> > +	/*
> > +	 * Preempting a task from an IRQ means we leave copies of PSTATE
> > +	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> > +	 * resuming one of these preempted tasks would undo those changes.
> > +	 *
> > +	 * Only allow a task to be preempted once cpufeatures have been
> > +	 * enabled.
> > +	 */
> > +	if (!system_capabilities_finalized())
> > +		return false;
> > +
> > +	return true;
> > +}
> > +
> > +#define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
> > +
> > +static inline unsigned long arch_prepare_report_syscall_entry(struct pt_regs *regs)
> > +{
> > +	unsigned long saved_reg;
> > +	int regno;
> > +
> > +	/*
> > +	 * We have some ABI weirdness here in the way that we handle syscall
> > +	 * exit stops because we indicate whether or not the stop has been
> > +	 * signalled from syscall entry or syscall exit by clobbering a general
> > +	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> > +	 * and restoring its old value after the stop. This means that:
> > +	 *
> > +	 * - Any writes by the tracer to this register during the stop are
> > +	 *   ignored/discarded.
> > +	 *
> > +	 * - The actual value of the register is not available during the stop,
> > +	 *   so the tracer cannot save it and restore it later.
> > +	 *
> > +	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> > +	 *   (the latter do not nobble any registers).
> > +	 */
> > +	regno = (is_compat_task() ? 12 : 7);
> > +	saved_reg = regs->regs[regno];
> > +	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> > +
> > +	return saved_reg;
> > +}
> > +
> > +#define arch_prepare_report_syscall_entry arch_prepare_report_syscall_entry
> > +
> > +static inline void arch_post_report_syscall_entry(struct pt_regs *regs,
> > +						  unsigned long saved_reg, long ret)
> > +{
> > +	int regno = (is_compat_task() ? 12 : 7);
> > +
> > +	if (ret)
> > +		forget_syscall(regs);
> > +
> > +	regs->regs[regno] = saved_reg;
> > +}
> > +
> > +#define arch_post_report_syscall_entry arch_post_report_syscall_entry
> > +
> > +static inline unsigned long arch_prepare_report_syscall_exit(struct pt_regs *regs,
> > +							     unsigned long work)
> > +{
> > +	unsigned long saved_reg;
> > +	int regno;
> > +
> > +	/* See comment for arch_prepare_report_syscall_entry() */
> > +	regno = (is_compat_task() ? 12 : 7);
> > +	saved_reg = regs->regs[regno];
> > +	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> > +
> > +	if (report_single_step(work)) {
> > +		/*
> > +		 * Signal a pseudo-step exception since we are stepping but
> > +		 * tracer modifications to the registers may have rewound the
> > +		 * state machine.
> > +		 */
> > +		regs->regs[regno] = saved_reg;
> > +	}
> > +
> > +	return saved_reg;
> > +}
> > +
> > +#define arch_prepare_report_syscall_exit arch_prepare_report_syscall_exit
> > +
> > +static inline void arch_post_report_syscall_exit(struct pt_regs *regs,
> > +						 unsigned long saved_reg,
> > +						 unsigned long work)
> > +{
> > +	int regno = (is_compat_task() ? 12 : 7);
> > +
> > +	if (!report_single_step(work))
> > +		regs->regs[regno] = saved_reg;
> > +}
> > +
> > +#define arch_post_report_syscall_exit arch_post_report_syscall_exit
> > +
> > +#endif /* _ASM_ARM64_ENTRY_COMMON_H */
> > diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
> > index ab8e14b96f68..9891b15da4c3 100644
> > --- a/arch/arm64/include/asm/syscall.h
> > +++ b/arch/arm64/include/asm/syscall.h
> > @@ -85,7 +85,9 @@ static inline int syscall_get_arch(struct task_struct *task)
> >  	return AUDIT_ARCH_AARCH64;
> >  }
> >  
> > -int syscall_trace_enter(struct pt_regs *regs);
> > -void syscall_trace_exit(struct pt_regs *regs);
> > +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
> > +{
> > +	return false;
> > +}
> >  
> >  #endif	/* __ASM_SYSCALL_H */
> > diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> > index e72a3bf9e563..ec5d74c53bf9 100644
> > --- a/arch/arm64/include/asm/thread_info.h
> > +++ b/arch/arm64/include/asm/thread_info.h
> > @@ -43,6 +43,7 @@ struct thread_info {
> >  	void			*scs_sp;
> >  #endif
> >  	u32			cpu;
> > +	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
> >  };
> >  
> >  #define thread_saved_pc(tsk)	\
> > @@ -64,11 +65,6 @@ void arch_setup_new_exec(void);
> >  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
> >  #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
> >  #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
> > -#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
> > -#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
> > -#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
> > -#define TIF_SECCOMP		11	/* syscall secure computing */
> > -#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
> >  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
> >  #define TIF_FREEZE		19
> >  #define TIF_RESTORE_SIGMASK	20
> > @@ -86,27 +82,12 @@ void arch_setup_new_exec(void);
> >  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
> >  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
> >  #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
> > -#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
> > -#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
> > -#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
> > -#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
> > -#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
> > -#define _TIF_UPROBE		(1 << TIF_UPROBE)
> > -#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
> > +#define _TIF_UPROBE            (1 << TIF_UPROBE)
> >  #define _TIF_32BIT		(1 << TIF_32BIT)
> >  #define _TIF_SVE		(1 << TIF_SVE)
> >  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
> >  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
> >  
> > -#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
> > -				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
> > -				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
> > -				 _TIF_NOTIFY_SIGNAL)
> > -
> > -#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
> > -				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
> > -				 _TIF_SYSCALL_EMU)
> > -
> >  #ifdef CONFIG_SHADOW_CALL_STACK
> >  #define INIT_SCS							\
> >  	.scs_base	= init_shadow_call_stack,			\
> > diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
> > index b77a15955f28..ac61aeb93933 100644
> > --- a/arch/arm64/kernel/entry-common.c
> > +++ b/arch/arm64/kernel/entry-common.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/sched.h>
> >  #include <linux/sched/debug.h>
> >  #include <linux/thread_info.h>
> > +#include <linux/entry-common.h>
> >  
> >  #include <asm/cpufeature.h>
> >  #include <asm/daifflags.h>
> > @@ -28,201 +29,15 @@
> >  #include <asm/sysreg.h>
> >  #include <asm/system_misc.h>
> >  
> > -/*
> > - * Handle IRQ/context state management when entering from kernel mode.
> > - * Before this function is called it is not safe to call regular kernel code,
> > - * instrumentable code, or any code which may trigger an exception.
> > - *
> > - * This is intended to match the logic in irqentry_enter(), handling the kernel
> > - * mode transitions only.
> > - */
> > -static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
> > -{
> > -	regs->exit_rcu = false;
> > -
> > -	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
> > -		lockdep_hardirqs_off(CALLER_ADDR0);
> > -		ct_irq_enter();
> > -		trace_hardirqs_off_finish();
> > -
> > -		regs->exit_rcu = true;
> > -		return;
> > -	}
> > -
> > -	lockdep_hardirqs_off(CALLER_ADDR0);
> > -	rcu_irq_enter_check_tick();
> > -	trace_hardirqs_off_finish();
> > -}
> > -
> > -static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
> > -{
> > -	__enter_from_kernel_mode(regs);
> > -	mte_check_tfsr_entry();
> > -	mte_disable_tco_entry(current);
> > -}
> > -
> > -/*
> > - * Handle IRQ/context state management when exiting to kernel mode.
> > - * After this function returns it is not safe to call regular kernel code,
> > - * instrumentable code, or any code which may trigger an exception.
> > - *
> > - * This is intended to match the logic in irqentry_exit(), handling the kernel
> > - * mode transitions only, and with preemption handled elsewhere.
> > - */
> > -static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
> > -{
> > -	lockdep_assert_irqs_disabled();
> > -
> > -	if (interrupts_enabled(regs)) {
> > -		if (regs->exit_rcu) {
> > -			trace_hardirqs_on_prepare();
> > -			lockdep_hardirqs_on_prepare();
> > -			ct_irq_exit();
> > -			lockdep_hardirqs_on(CALLER_ADDR0);
> > -			return;
> > -		}
> > -
> > -		trace_hardirqs_on();
> > -	} else {
> > -		if (regs->exit_rcu)
> > -			ct_irq_exit();
> > -	}
> > -}
> > -
> > -static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
> > -{
> > -	mte_check_tfsr_exit();
> > -	__exit_to_kernel_mode(regs);
> > -}
> > -
> > -/*
> > - * Handle IRQ/context state management when entering from user mode.
> > - * Before this function is called it is not safe to call regular kernel code,
> > - * instrumentable code, or any code which may trigger an exception.
> > - */
> > -static __always_inline void __enter_from_user_mode(void)
> > +static __always_inline void exit_to_user_mode_wrapper(struct pt_regs *regs)
> >  {
> > -	lockdep_hardirqs_off(CALLER_ADDR0);
> > -	CT_WARN_ON(ct_state() != CONTEXT_USER);
> > -	user_exit_irqoff();
> > -	trace_hardirqs_off_finish();
> > -	mte_disable_tco_entry(current);
> > -}
> > -
> > -static __always_inline void enter_from_user_mode(struct pt_regs *regs)
> > -{
> > -	__enter_from_user_mode();
> > -}
> > -
> > -/*
> > - * Handle IRQ/context state management when exiting to user mode.
> > - * After this function returns it is not safe to call regular kernel code,
> > - * instrumentable code, or any code which may trigger an exception.
> > - */
> > -static __always_inline void __exit_to_user_mode(void)
> > -{
> > -	trace_hardirqs_on_prepare();
> > -	lockdep_hardirqs_on_prepare();
> > -	user_enter_irqoff();
> > -	lockdep_hardirqs_on(CALLER_ADDR0);
> > -}
> > -
> > -static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
> > -{
> > -	do {
> > -		local_irq_enable();
> > -
> > -		if (thread_flags & _TIF_NEED_RESCHED)
> > -			schedule();
> > -
> > -		if (thread_flags & _TIF_UPROBE)
> > -			uprobe_notify_resume(regs);
> > -
> > -		if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
> > -			clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> > -			send_sig_fault(SIGSEGV, SEGV_MTEAERR,
> > -				       (void __user *)NULL, current);
> > -		}
> > -
> > -		if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
> > -			do_signal(regs);
> > -
> > -		if (thread_flags & _TIF_NOTIFY_RESUME)
> > -			resume_user_mode_work(regs);
> > -
> > -		if (thread_flags & _TIF_FOREIGN_FPSTATE)
> > -			fpsimd_restore_current_state();
> > -
> > -		local_irq_disable();
> > -		thread_flags = read_thread_flags();
> > -	} while (thread_flags & _TIF_WORK_MASK);
> > -}
> > -
> > -static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
> > -{
> > -	unsigned long flags;
> > -
> >  	local_irq_disable();
> > -
> > -	flags = read_thread_flags();
> > -	if (unlikely(flags & _TIF_WORK_MASK))
> > -		do_notify_resume(regs, flags);
> > -
> > -	local_daif_mask();
> > -
> > -	lockdep_sys_exit();
> > -}
> > -
> > -static __always_inline void exit_to_user_mode(struct pt_regs *regs)
> > -{
> > -	exit_to_user_mode_prepare(regs);
> > -	mte_check_tfsr_exit();
> > -	__exit_to_user_mode();
> > +	irqentry_exit_to_user_mode(regs);
> >  }
> >  
> >  asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
> >  {
> > -	exit_to_user_mode(regs);
> > -}
> > -
> > -/*
> > - * Handle IRQ/context state management when entering an NMI from user/kernel
> > - * mode. Before this function is called it is not safe to call regular kernel
> > - * code, instrumentable code, or any code which may trigger an exception.
> > - */
> > -static void noinstr arm64_enter_nmi(struct pt_regs *regs)
> > -{
> > -	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
> > -
> > -	__nmi_enter();
> > -	lockdep_hardirqs_off(CALLER_ADDR0);
> > -	lockdep_hardirq_enter();
> > -	ct_nmi_enter();
> > -
> > -	trace_hardirqs_off_finish();
> > -	ftrace_nmi_enter();
> > -}
> > -
> > -/*
> > - * Handle IRQ/context state management when exiting an NMI from user/kernel
> > - * mode. After this function returns it is not safe to call regular kernel
> > - * code, instrumentable code, or any code which may trigger an exception.
> > - */
> > -static void noinstr arm64_exit_nmi(struct pt_regs *regs)
> > -{
> > -	bool restore = regs->lockdep_hardirqs;
> > -
> > -	ftrace_nmi_exit();
> > -	if (restore) {
> > -		trace_hardirqs_on_prepare();
> > -		lockdep_hardirqs_on_prepare();
> > -	}
> > -
> > -	ct_nmi_exit();
> > -	lockdep_hardirq_exit();
> > -	if (restore)
> > -		lockdep_hardirqs_on(CALLER_ADDR0);
> > -	__nmi_exit();
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  /*
> > @@ -259,48 +74,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
> >  		lockdep_hardirqs_on(CALLER_ADDR0);
> >  }
> >  
> > -#ifdef CONFIG_PREEMPT_DYNAMIC
> > -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
> > -#define need_irq_preemption() \
> > -	(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
> > -#else
> > -#define need_irq_preemption()	(IS_ENABLED(CONFIG_PREEMPTION))
> > -#endif
> > -
> > -static void __sched arm64_preempt_schedule_irq(void)
> > -{
> > -	if (!need_irq_preemption())
> > -		return;
> > -
> > -	/*
> > -	 * Note: thread_info::preempt_count includes both thread_info::count
> > -	 * and thread_info::need_resched, and is not equivalent to
> > -	 * preempt_count().
> > -	 */
> > -	if (READ_ONCE(current_thread_info()->preempt_count) != 0)
> > -		return;
> > -
> > -	/*
> > -	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
> > -	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
> > -	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
> > -	 * DAIF we must have handled an NMI, so skip preemption.
> > -	 */
> > -	if (system_uses_irq_prio_masking() && read_sysreg(daif))
> > -		return;
> > -
> > -	/*
> > -	 * Preempting a task from an IRQ means we leave copies of PSTATE
> > -	 * on the stack. cpufeature's enable calls may modify PSTATE, but
> > -	 * resuming one of these preempted tasks would undo those changes.
> > -	 *
> > -	 * Only allow a task to be preempted once cpufeatures have been
> > -	 * enabled.
> > -	 */
> > -	if (system_capabilities_finalized())
> > -		preempt_schedule_irq();
> > -}
> > -
> >  static void do_interrupt_handler(struct pt_regs *regs,
> >  				 void (*handler)(struct pt_regs *))
> >  {
> > @@ -320,7 +93,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
> >  static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
> >  				      unsigned long esr)
> >  {
> > -	arm64_enter_nmi(regs);
> > +	irqentry_nmi_enter(regs);
> >  
> >  	console_verbose();
> >  
> > @@ -426,41 +199,43 @@ UNHANDLED(el1t, 64, error)
> >  static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
> >  {
> >  	unsigned long far = read_sysreg(far_el1);
> > +	irqentry_state_t state = irqentry_enter(regs);
> >  
> > -	enter_from_kernel_mode(regs);
> >  	local_daif_inherit(regs);
> >  	do_mem_abort(far, esr, regs);
> >  	local_daif_mask();
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  
> >  static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
> >  {
> >  	unsigned long far = read_sysreg(far_el1);
> > +	irqentry_state_t state = irqentry_enter(regs);
> >  
> > -	enter_from_kernel_mode(regs);
> >  	local_daif_inherit(regs);
> >  	do_sp_pc_abort(far, esr, regs);
> >  	local_daif_mask();
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  
> >  static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_kernel_mode(regs);
> > +	irqentry_state_t state = irqentry_enter(regs);
> > +
> >  	local_daif_inherit(regs);
> >  	do_el1_undef(regs, esr);
> >  	local_daif_mask();
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  
> >  static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_kernel_mode(regs);
> > +	irqentry_state_t state = irqentry_enter(regs);
> > +
> >  	local_daif_inherit(regs);
> >  	do_el1_bti(regs, esr);
> >  	local_daif_mask();
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  
> >  static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
> > @@ -475,11 +250,12 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
> >  
> >  static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_kernel_mode(regs);
> > +	irqentry_state_t state = irqentry_enter(regs);
> > +
> >  	local_daif_inherit(regs);
> >  	do_el1_fpac(regs, esr);
> >  	local_daif_mask();
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  
> >  asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
> > @@ -522,23 +298,22 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
> >  static __always_inline void __el1_pnmi(struct pt_regs *regs,
> >  				       void (*handler)(struct pt_regs *))
> >  {
> > -	arm64_enter_nmi(regs);
> > +	irqentry_state_t state = irqentry_nmi_enter(regs);
> > +
> >  	do_interrupt_handler(regs, handler);
> > -	arm64_exit_nmi(regs);
> > +	irqentry_nmi_exit(regs, state);
> >  }
> >  
> >  static __always_inline void __el1_irq(struct pt_regs *regs,
> >  				      void (*handler)(struct pt_regs *))
> >  {
> > -	enter_from_kernel_mode(regs);
> > +	irqentry_state_t state = irqentry_enter(regs);
> >  
> >  	irq_enter_rcu();
> >  	do_interrupt_handler(regs, handler);
> >  	irq_exit_rcu();
> >  
> > -	arm64_preempt_schedule_irq();
> > -
> > -	exit_to_kernel_mode(regs);
> > +	irqentry_exit(regs, state);
> >  }
> >  static void noinstr el1_interrupt(struct pt_regs *regs,
> >  				  void (*handler)(struct pt_regs *))
> > @@ -564,21 +339,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
> >  asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
> >  {
> >  	unsigned long esr = read_sysreg(esr_el1);
> > +	irqentry_state_t state;
> >  
> >  	local_daif_restore(DAIF_ERRCTX);
> > -	arm64_enter_nmi(regs);
> > +	state = irqentry_nmi_enter(regs);
> >  	do_serror(regs, esr);
> > -	arm64_exit_nmi(regs);
> > +	irqentry_nmi_exit(regs, state);
> >  }
> >  
> >  static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
> >  {
> >  	unsigned long far = read_sysreg(far_el1);
> >  
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_mem_abort(far, esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
> > @@ -593,50 +369,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
> >  	if (!is_ttbr0_addr(far))
> >  		arm64_apply_bp_hardening();
> >  
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_mem_abort(far, esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_fpsimd_acc(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_sve_acc(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_sme_acc(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_fpsimd_exc(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_sys(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
> > @@ -646,50 +422,50 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
> >  	if (!is_ttbr0_addr(instruction_pointer(regs)))
> >  		arm64_apply_bp_hardening();
> >  
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_sp_pc_abort(far, esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_sp_pc_abort(regs->sp, esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_undef(regs, esr);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_bti(struct pt_regs *regs)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_bti(regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_mops(regs, esr);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	bad_el0_sync(regs, 0, esr);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
> > @@ -697,28 +473,28 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
> >  	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
> >  	unsigned long far = read_sysreg(far_el1);
> >  
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	do_debug_exception(far, esr, regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_svc(struct pt_regs *regs)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	cortex_a76_erratum_1463225_svc_handler();
> >  	fp_user_discard();
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_svc(regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_fpac(regs, esr);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
> > @@ -783,7 +559,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
> >  static void noinstr el0_interrupt(struct pt_regs *regs,
> >  				  void (*handler)(struct pt_regs *))
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  
> >  	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
> >  
> > @@ -794,7 +570,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
> >  	do_interrupt_handler(regs, handler);
> >  	irq_exit_rcu();
> >  
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
> > @@ -820,14 +596,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
> >  static void noinstr __el0_error_handler_common(struct pt_regs *regs)
> >  {
> >  	unsigned long esr = read_sysreg(esr_el1);
> > +	irqentry_state_t state_nmi;
> >  
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_ERRCTX);
> > -	arm64_enter_nmi(regs);
> > +	state_nmi = irqentry_nmi_enter(regs);
> >  	do_serror(regs, esr);
> > -	arm64_exit_nmi(regs);
> > +	irqentry_nmi_exit(regs, state_nmi);
> >  	local_daif_restore(DAIF_PROCCTX);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
> > @@ -838,19 +615,19 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
> >  #ifdef CONFIG_COMPAT
> >  static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_cp15(esr, regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  static void noinstr el0_svc_compat(struct pt_regs *regs)
> >  {
> > -	enter_from_user_mode(regs);
> > +	irqentry_enter_from_user_mode(regs);
> >  	cortex_a76_erratum_1463225_svc_handler();
> >  	local_daif_restore(DAIF_PROCCTX);
> >  	do_el0_svc_compat(regs);
> > -	exit_to_user_mode(regs);
> > +	exit_to_user_mode_wrapper(regs);
> >  }
> >  
> >  asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
> > @@ -924,7 +701,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
> >  	unsigned long esr = read_sysreg(esr_el1);
> >  	unsigned long far = read_sysreg(far_el1);
> >  
> > -	arm64_enter_nmi(regs);
> > +	irqentry_nmi_enter(regs);
> >  	panic_bad_stack(regs, esr, far);
> >  }
> >  #endif /* CONFIG_VMAP_STACK */
> > @@ -933,6 +710,7 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
> >  asmlinkage noinstr unsigned long
> >  __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
> >  {
> > +	irqentry_state_t state;
> >  	unsigned long ret;
> >  
> >  	/*
> > @@ -957,9 +735,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
> >  	else if (cpu_has_pan())
> >  		set_pstate_pan(0);
> >  
> > -	arm64_enter_nmi(regs);
> > +	state = irqentry_nmi_enter(regs);
> >  	ret = do_sdei_event(regs, arg);
> > -	arm64_exit_nmi(regs);
> > +	irqentry_nmi_exit(regs, state);
> >  
> >  	return ret;
> >  }
> > diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> > index 60fd85d5119d..9cc0f450fa76 100644
> > --- a/arch/arm64/kernel/ptrace.c
> > +++ b/arch/arm64/kernel/ptrace.c
> > @@ -41,9 +41,6 @@
> >  #include <asm/traps.h>
> >  #include <asm/system_misc.h>
> >  
> > -#define CREATE_TRACE_POINTS
> > -#include <trace/events/syscalls.h>
> > -
> >  struct pt_regs_offset {
> >  	const char *name;
> >  	int offset;
> > @@ -2179,104 +2176,6 @@ long arch_ptrace(struct task_struct *child, long request,
> >  	return ptrace_request(child, request, addr, data);
> >  }
> >  
> > -enum ptrace_syscall_dir {
> > -	PTRACE_SYSCALL_ENTER = 0,
> > -	PTRACE_SYSCALL_EXIT,
> > -};
> > -
> > -static void report_syscall_enter(struct pt_regs *regs)
> > -{
> > -	int regno;
> > -	unsigned long saved_reg;
> > -
> > -	/*
> > -	 * We have some ABI weirdness here in the way that we handle syscall
> > -	 * exit stops because we indicate whether or not the stop has been
> > -	 * signalled from syscall entry or syscall exit by clobbering a general
> > -	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
> > -	 * and restoring its old value after the stop. This means that:
> > -	 *
> > -	 * - Any writes by the tracer to this register during the stop are
> > -	 *   ignored/discarded.
> > -	 *
> > -	 * - The actual value of the register is not available during the stop,
> > -	 *   so the tracer cannot save it and restore it later.
> > -	 *
> > -	 * - Syscall stops behave differently to seccomp and pseudo-step traps
> > -	 *   (the latter do not nobble any registers).
> > -	 */
> > -	regno = (is_compat_task() ? 12 : 7);
> > -	saved_reg = regs->regs[regno];
> > -	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
> > -
> > -	if (ptrace_report_syscall_entry(regs))
> > -		forget_syscall(regs);
> > -	regs->regs[regno] = saved_reg;
> > -}
> > -
> > -static void report_syscall_exit(struct pt_regs *regs)
> > -{
> > -	int regno;
> > -	unsigned long saved_reg;
> > -
> > -	/* See comment for report_syscall_enter() */
> > -	regno = (is_compat_task() ? 12 : 7);
> > -	saved_reg = regs->regs[regno];
> > -	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
> > -
> > -	if (!test_thread_flag(TIF_SINGLESTEP)) {
> > -		ptrace_report_syscall_exit(regs, 0);
> > -		regs->regs[regno] = saved_reg;
> > -	} else {
> > -		regs->regs[regno] = saved_reg;
> > -
> > -		/*
> > -		 * Signal a pseudo-step exception since we are stepping but
> > -		 * tracer modifications to the registers may have rewound the
> > -		 * state machine.
> > -		 */
> > -		ptrace_report_syscall_exit(regs, 1);
> > -	}
> > -}
> > -
> > -int syscall_trace_enter(struct pt_regs *regs)
> > -{
> > -	unsigned long flags = read_thread_flags();
> > -
> > -	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
> > -		report_syscall_enter(regs);
> > -		if (flags & _TIF_SYSCALL_EMU)
> > -			return NO_SYSCALL;
> > -	}
> > -
> > -	/* Do the secure computing after ptrace; failures should be fast. */
> > -	if (secure_computing() == -1)
> > -		return NO_SYSCALL;
> > -
> > -	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> > -		trace_sys_enter(regs, regs->syscallno);
> > -
> > -	audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
> > -			    regs->regs[2], regs->regs[3]);
> > -
> > -	return regs->syscallno;
> > -}
> > -
> > -void syscall_trace_exit(struct pt_regs *regs)
> > -{
> > -	unsigned long flags = read_thread_flags();
> > -
> > -	audit_syscall_exit(regs);
> > -
> > -	if (flags & _TIF_SYSCALL_TRACEPOINT)
> > -		trace_sys_exit(regs, syscall_get_return_value(current, regs));
> > -
> > -	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
> > -		report_syscall_exit(regs);
> > -
> > -	rseq_syscall(regs);
> > -}
> > -
> >  /*
> >   * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
> >   * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
> > diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> > index 4a77f4976e11..2982f6db6d96 100644
> > --- a/arch/arm64/kernel/signal.c
> > +++ b/arch/arm64/kernel/signal.c
> > @@ -19,6 +19,7 @@
> >  #include <linux/ratelimit.h>
> >  #include <linux/rseq.h>
> >  #include <linux/syscalls.h>
> > +#include <linux/entry-common.h>
> >  
> >  #include <asm/daifflags.h>
> >  #include <asm/debug-monitors.h>
> > @@ -1266,7 +1267,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
> >   * the kernel can handle, and then we build all the user-level signal handling
> >   * stack-frames in one go after that.
> >   */
> > -void do_signal(struct pt_regs *regs)
> > +void arch_do_signal_or_restart(struct pt_regs *regs)
> >  {
> >  	unsigned long continue_addr = 0, restart_addr = 0;
> >  	int retval = 0;
> > diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
> > index ad198262b981..160ac9d15c27 100644
> > --- a/arch/arm64/kernel/syscall.c
> > +++ b/arch/arm64/kernel/syscall.c
> > @@ -7,6 +7,7 @@
> >  #include <linux/ptrace.h>
> >  #include <linux/randomize_kstack.h>
> >  #include <linux/syscalls.h>
> > +#include <linux/entry-common.h>
> >  
> >  #include <asm/debug-monitors.h>
> >  #include <asm/exception.h>
> > @@ -66,14 +67,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
> >  	choose_random_kstack_offset(get_random_u16() & 0x1FF);
> >  }
> >  
> > -static inline bool has_syscall_work(unsigned long flags)
> > +static inline bool has_syscall_work(unsigned long work)
> >  {
> > -	return unlikely(flags & _TIF_SYSCALL_WORK);
> > +	return unlikely(work & SYSCALL_WORK_ENTER);
> >  }
> >  
> >  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
> >  			   const syscall_fn_t syscall_table[])
> >  {
> > +	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
> >  	unsigned long flags = read_thread_flags();
> >  
> >  	regs->orig_x0 = regs->regs[0];
> > @@ -107,7 +109,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
> >  		return;
> >  	}
> >  
> > -	if (has_syscall_work(flags)) {
> > +	if (has_syscall_work(work)) {
> >  		/*
> >  		 * The de-facto standard way to skip a system call using ptrace
> >  		 * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
> > @@ -125,7 +127,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
> >  		 */
> >  		if (scno == NO_SYSCALL)
> >  			syscall_set_return_value(current, regs, -ENOSYS, 0);
> > -		scno = syscall_trace_enter(regs);
> > +		scno = syscall_trace_enter(regs, regs->syscallno, work);
> >  		if (scno == NO_SYSCALL)
> >  			goto trace_exit;
> >  	}
> > @@ -137,14 +139,14 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
> >  	 * check again. However, if we were tracing entry, then we always trace
> >  	 * exit regardless, as the old entry assembly did.
> >  	 */
> > -	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> > -		flags = read_thread_flags();
> > -		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
> > +	if (!has_syscall_work(work) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> > +		work = READ_ONCE(current_thread_info()->syscall_work);
> > +		if (!has_syscall_work(work) && !report_single_step(work))
> >  			return;
> >  	}
> >  
> >  trace_exit:
> > -	syscall_trace_exit(regs);
> > +	syscall_exit_work(regs, work);
> >  }
> >  
> >  void do_el0_svc(struct pt_regs *regs)
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..e6ccc5ea06fe 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -138,6 +138,7 @@  config ARM64
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_ENTRY
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IOREMAP
 	select GENERIC_IRQ_IPI
diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
new file mode 100644
index 000000000000..a14d62b2eb69
--- /dev/null
+++ b/arch/arm64/include/asm/entry-common.h
@@ -0,0 +1,172 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_ARM64_ENTRY_COMMON_H
+#define _ASM_ARM64_ENTRY_COMMON_H
+
+#include <linux/thread_info.h>
+
+#include <asm/daifflags.h>
+#include <asm/fpsimd.h>
+#include <asm/mte.h>
+#include <asm/stacktrace.h>
+
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
+
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+	mte_disable_tco_entry(current);
+}
+
+#define arch_enter_from_user_mode arch_enter_from_user_mode
+
+static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
+							unsigned long ti_work)
+{
+	if (ti_work & _TIF_MTE_ASYNC_FAULT) {
+		clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+		send_sig_fault(SIGSEGV, SEGV_MTEAERR, (void __user *)NULL, current);
+	}
+
+	if (ti_work & _TIF_FOREIGN_FPSTATE)
+		fpsimd_restore_current_state();
+}
+
+#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	local_daif_mask();
+	mte_check_tfsr_exit();
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+static inline void arch_enter_from_kernel_mode(struct pt_regs *regs)
+{
+	mte_check_tfsr_entry();
+	mte_disable_tco_entry(current);
+}
+
+#define arch_enter_from_kernel_mode arch_enter_from_kernel_mode
+
+static inline void arch_exit_to_kernel_mode_prepare(struct pt_regs *regs)
+{
+	mte_check_tfsr_exit();
+}
+
+#define arch_exit_to_kernel_mode_prepare arch_exit_to_kernel_mode_prepare
+
+static inline bool arch_irqentry_exit_need_resched(void)
+{
+	/*
+	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
+	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+	 * DAIF we must have handled an NMI, so skip preemption.
+	 */
+	if (system_uses_irq_prio_masking() && read_sysreg(daif))
+		return false;
+
+	/*
+	 * Preempting a task from an IRQ means we leave copies of PSTATE
+	 * on the stack. cpufeature's enable calls may modify PSTATE, but
+	 * resuming one of these preempted tasks would undo those changes.
+	 *
+	 * Only allow a task to be preempted once cpufeatures have been
+	 * enabled.
+	 */
+	if (!system_capabilities_finalized())
+		return false;
+
+	return true;
+}
+
+#define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
+
+static inline unsigned long arch_prepare_report_syscall_entry(struct pt_regs *regs)
+{
+	unsigned long saved_reg;
+	int regno;
+
+	/*
+	 * We have some ABI weirdness here in the way that we handle syscall
+	 * exit stops because we indicate whether or not the stop has been
+	 * signalled from syscall entry or syscall exit by clobbering a general
+	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
+	 * and restoring its old value after the stop. This means that:
+	 *
+	 * - Any writes by the tracer to this register during the stop are
+	 *   ignored/discarded.
+	 *
+	 * - The actual value of the register is not available during the stop,
+	 *   so the tracer cannot save it and restore it later.
+	 *
+	 * - Syscall stops behave differently to seccomp and pseudo-step traps
+	 *   (the latter do not nobble any registers).
+	 */
+	regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[regno];
+	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
+
+	return saved_reg;
+}
+
+#define arch_prepare_report_syscall_entry arch_prepare_report_syscall_entry
+
+static inline void arch_post_report_syscall_entry(struct pt_regs *regs,
+						  unsigned long saved_reg, long ret)
+{
+	int regno = (is_compat_task() ? 12 : 7);
+
+	if (ret)
+		forget_syscall(regs);
+
+	regs->regs[regno] = saved_reg;
+}
+
+#define arch_post_report_syscall_entry arch_post_report_syscall_entry
+
+static inline unsigned long arch_prepare_report_syscall_exit(struct pt_regs *regs,
+							     unsigned long work)
+{
+	unsigned long saved_reg;
+	int regno;
+
+	/* See comment for arch_prepare_report_syscall_entry() */
+	regno = (is_compat_task() ? 12 : 7);
+	saved_reg = regs->regs[regno];
+	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
+
+	if (report_single_step(work)) {
+		/*
+		 * Signal a pseudo-step exception since we are stepping but
+		 * tracer modifications to the registers may have rewound the
+		 * state machine.
+		 */
+		regs->regs[regno] = saved_reg;
+	}
+
+	return saved_reg;
+}
+
+#define arch_prepare_report_syscall_exit arch_prepare_report_syscall_exit
+
+static inline void arch_post_report_syscall_exit(struct pt_regs *regs,
+						 unsigned long saved_reg,
+						 unsigned long work)
+{
+	int regno = (is_compat_task() ? 12 : 7);
+
+	if (!report_single_step(work))
+		regs->regs[regno] = saved_reg;
+}
+
+#define arch_post_report_syscall_exit arch_post_report_syscall_exit
+
+#endif /* _ASM_ARM64_ENTRY_COMMON_H */
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ab8e14b96f68..9891b15da4c3 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -85,7 +85,9 @@  static inline int syscall_get_arch(struct task_struct *task)
 	return AUDIT_ARCH_AARCH64;
 }
 
-int syscall_trace_enter(struct pt_regs *regs);
-void syscall_trace_exit(struct pt_regs *regs);
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
 
 #endif	/* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..ec5d74c53bf9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -43,6 +43,7 @@  struct thread_info {
 	void			*scs_sp;
 #endif
 	u32			cpu;
+	unsigned long		syscall_work;   /* SYSCALL_WORK_ flags */
 };
 
 #define thread_saved_pc(tsk)	\
@@ -64,11 +65,6 @@  void arch_setup_new_exec(void);
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
 #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
-#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
-#define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
-#define TIF_SECCOMP		11	/* syscall secure computing */
-#define TIF_SYSCALL_EMU		12	/* syscall emulation active */
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_FREEZE		19
 #define TIF_RESTORE_SIGMASK	20
@@ -86,27 +82,12 @@  void arch_setup_new_exec(void);
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
-#define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
-#define _TIF_UPROBE		(1 << TIF_UPROBE)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
+#define _TIF_UPROBE            (1 << TIF_UPROBE)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_SVE		(1 << TIF_SVE)
 #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
-				 _TIF_NOTIFY_SIGNAL)
-
-#define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
-				 _TIF_SYSCALL_EMU)
-
 #ifdef CONFIG_SHADOW_CALL_STACK
 #define INIT_SCS							\
 	.scs_base	= init_shadow_call_stack,			\
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index b77a15955f28..ac61aeb93933 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -14,6 +14,7 @@ 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/thread_info.h>
+#include <linux/entry-common.h>
 
 #include <asm/cpufeature.h>
 #include <asm/daifflags.h>
@@ -28,201 +29,15 @@ 
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
 
-/*
- * Handle IRQ/context state management when entering from kernel mode.
- * Before this function is called it is not safe to call regular kernel code,
- * instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_enter(), handling the kernel
- * mode transitions only.
- */
-static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
-{
-	regs->exit_rcu = false;
-
-	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
-		lockdep_hardirqs_off(CALLER_ADDR0);
-		ct_irq_enter();
-		trace_hardirqs_off_finish();
-
-		regs->exit_rcu = true;
-		return;
-	}
-
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	rcu_irq_enter_check_tick();
-	trace_hardirqs_off_finish();
-}
-
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
-{
-	__enter_from_kernel_mode(regs);
-	mte_check_tfsr_entry();
-	mte_disable_tco_entry(current);
-}
-
-/*
- * Handle IRQ/context state management when exiting to kernel mode.
- * After this function returns it is not safe to call regular kernel code,
- * instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_exit(), handling the kernel
- * mode transitions only, and with preemption handled elsewhere.
- */
-static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
-{
-	lockdep_assert_irqs_disabled();
-
-	if (interrupts_enabled(regs)) {
-		if (regs->exit_rcu) {
-			trace_hardirqs_on_prepare();
-			lockdep_hardirqs_on_prepare();
-			ct_irq_exit();
-			lockdep_hardirqs_on(CALLER_ADDR0);
-			return;
-		}
-
-		trace_hardirqs_on();
-	} else {
-		if (regs->exit_rcu)
-			ct_irq_exit();
-	}
-}
-
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
-{
-	mte_check_tfsr_exit();
-	__exit_to_kernel_mode(regs);
-}
-
-/*
- * Handle IRQ/context state management when entering from user mode.
- * Before this function is called it is not safe to call regular kernel code,
- * instrumentable code, or any code which may trigger an exception.
- */
-static __always_inline void __enter_from_user_mode(void)
+static __always_inline void exit_to_user_mode_wrapper(struct pt_regs *regs)
 {
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	CT_WARN_ON(ct_state() != CONTEXT_USER);
-	user_exit_irqoff();
-	trace_hardirqs_off_finish();
-	mte_disable_tco_entry(current);
-}
-
-static __always_inline void enter_from_user_mode(struct pt_regs *regs)
-{
-	__enter_from_user_mode();
-}
-
-/*
- * Handle IRQ/context state management when exiting to user mode.
- * After this function returns it is not safe to call regular kernel code,
- * instrumentable code, or any code which may trigger an exception.
- */
-static __always_inline void __exit_to_user_mode(void)
-{
-	trace_hardirqs_on_prepare();
-	lockdep_hardirqs_on_prepare();
-	user_enter_irqoff();
-	lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
-{
-	do {
-		local_irq_enable();
-
-		if (thread_flags & _TIF_NEED_RESCHED)
-			schedule();
-
-		if (thread_flags & _TIF_UPROBE)
-			uprobe_notify_resume(regs);
-
-		if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
-			clear_thread_flag(TIF_MTE_ASYNC_FAULT);
-			send_sig_fault(SIGSEGV, SEGV_MTEAERR,
-				       (void __user *)NULL, current);
-		}
-
-		if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
-			do_signal(regs);
-
-		if (thread_flags & _TIF_NOTIFY_RESUME)
-			resume_user_mode_work(regs);
-
-		if (thread_flags & _TIF_FOREIGN_FPSTATE)
-			fpsimd_restore_current_state();
-
-		local_irq_disable();
-		thread_flags = read_thread_flags();
-	} while (thread_flags & _TIF_WORK_MASK);
-}
-
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
-{
-	unsigned long flags;
-
 	local_irq_disable();
-
-	flags = read_thread_flags();
-	if (unlikely(flags & _TIF_WORK_MASK))
-		do_notify_resume(regs, flags);
-
-	local_daif_mask();
-
-	lockdep_sys_exit();
-}
-
-static __always_inline void exit_to_user_mode(struct pt_regs *regs)
-{
-	exit_to_user_mode_prepare(regs);
-	mte_check_tfsr_exit();
-	__exit_to_user_mode();
+	irqentry_exit_to_user_mode(regs);
 }
 
 asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
 {
-	exit_to_user_mode(regs);
-}
-
-/*
- * Handle IRQ/context state management when entering an NMI from user/kernel
- * mode. Before this function is called it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_enter_nmi(struct pt_regs *regs)
-{
-	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
-	__nmi_enter();
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	lockdep_hardirq_enter();
-	ct_nmi_enter();
-
-	trace_hardirqs_off_finish();
-	ftrace_nmi_enter();
-}
-
-/*
- * Handle IRQ/context state management when exiting an NMI from user/kernel
- * mode. After this function returns it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_exit_nmi(struct pt_regs *regs)
-{
-	bool restore = regs->lockdep_hardirqs;
-
-	ftrace_nmi_exit();
-	if (restore) {
-		trace_hardirqs_on_prepare();
-		lockdep_hardirqs_on_prepare();
-	}
-
-	ct_nmi_exit();
-	lockdep_hardirq_exit();
-	if (restore)
-		lockdep_hardirqs_on(CALLER_ADDR0);
-	__nmi_exit();
+	exit_to_user_mode_wrapper(regs);
 }
 
 /*
@@ -259,48 +74,6 @@  static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
 		lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
-#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#define need_irq_preemption() \
-	(static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
-#else
-#define need_irq_preemption()	(IS_ENABLED(CONFIG_PREEMPTION))
-#endif
-
-static void __sched arm64_preempt_schedule_irq(void)
-{
-	if (!need_irq_preemption())
-		return;
-
-	/*
-	 * Note: thread_info::preempt_count includes both thread_info::count
-	 * and thread_info::need_resched, and is not equivalent to
-	 * preempt_count().
-	 */
-	if (READ_ONCE(current_thread_info()->preempt_count) != 0)
-		return;
-
-	/*
-	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
-	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
-	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
-	 * DAIF we must have handled an NMI, so skip preemption.
-	 */
-	if (system_uses_irq_prio_masking() && read_sysreg(daif))
-		return;
-
-	/*
-	 * Preempting a task from an IRQ means we leave copies of PSTATE
-	 * on the stack. cpufeature's enable calls may modify PSTATE, but
-	 * resuming one of these preempted tasks would undo those changes.
-	 *
-	 * Only allow a task to be preempted once cpufeatures have been
-	 * enabled.
-	 */
-	if (system_capabilities_finalized())
-		preempt_schedule_irq();
-}
-
 static void do_interrupt_handler(struct pt_regs *regs,
 				 void (*handler)(struct pt_regs *))
 {
@@ -320,7 +93,7 @@  extern void (*handle_arch_fiq)(struct pt_regs *);
 static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
 				      unsigned long esr)
 {
-	arm64_enter_nmi(regs);
+	irqentry_nmi_enter(regs);
 
 	console_verbose();
 
@@ -426,41 +199,43 @@  UNHANDLED(el1t, 64, error)
 static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
+	irqentry_state_t state = irqentry_enter(regs);
 
-	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	do_mem_abort(far, esr, regs);
 	local_daif_mask();
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 
 static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
+	irqentry_state_t state = irqentry_enter(regs);
 
-	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	do_sp_pc_abort(far, esr, regs);
 	local_daif_mask();
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 
 static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_kernel_mode(regs);
+	irqentry_state_t state = irqentry_enter(regs);
+
 	local_daif_inherit(regs);
 	do_el1_undef(regs, esr);
 	local_daif_mask();
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 
 static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_kernel_mode(regs);
+	irqentry_state_t state = irqentry_enter(regs);
+
 	local_daif_inherit(regs);
 	do_el1_bti(regs, esr);
 	local_daif_mask();
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 
 static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
@@ -475,11 +250,12 @@  static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 
 static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_kernel_mode(regs);
+	irqentry_state_t state = irqentry_enter(regs);
+
 	local_daif_inherit(regs);
 	do_el1_fpac(regs, esr);
 	local_daif_mask();
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 
 asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
@@ -522,23 +298,22 @@  asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
 static __always_inline void __el1_pnmi(struct pt_regs *regs,
 				       void (*handler)(struct pt_regs *))
 {
-	arm64_enter_nmi(regs);
+	irqentry_state_t state = irqentry_nmi_enter(regs);
+
 	do_interrupt_handler(regs, handler);
-	arm64_exit_nmi(regs);
+	irqentry_nmi_exit(regs, state);
 }
 
 static __always_inline void __el1_irq(struct pt_regs *regs,
 				      void (*handler)(struct pt_regs *))
 {
-	enter_from_kernel_mode(regs);
+	irqentry_state_t state = irqentry_enter(regs);
 
 	irq_enter_rcu();
 	do_interrupt_handler(regs, handler);
 	irq_exit_rcu();
 
-	arm64_preempt_schedule_irq();
-
-	exit_to_kernel_mode(regs);
+	irqentry_exit(regs, state);
 }
 static void noinstr el1_interrupt(struct pt_regs *regs,
 				  void (*handler)(struct pt_regs *))
@@ -564,21 +339,22 @@  asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
 asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
+	irqentry_state_t state;
 
 	local_daif_restore(DAIF_ERRCTX);
-	arm64_enter_nmi(regs);
+	state = irqentry_nmi_enter(regs);
 	do_serror(regs, esr);
-	arm64_exit_nmi(regs);
+	irqentry_nmi_exit(regs, state);
 }
 
 static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -593,50 +369,50 @@  static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(far))
 		arm64_apply_bp_hardening();
 
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_acc(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sve_acc(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sme_acc(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_exc(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_sys(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -646,50 +422,50 @@  static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(instruction_pointer(regs)))
 		arm64_apply_bp_hardening();
 
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(far, esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(regs->sp, esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_undef(regs, esr);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_bti(struct pt_regs *regs)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_bti(regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_mops(regs, esr);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	bad_el0_sync(regs, 0, esr);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
@@ -697,28 +473,28 @@  static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
 	unsigned long far = read_sysreg(far_el1);
 
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	do_debug_exception(far, esr, regs);
 	local_daif_restore(DAIF_PROCCTX);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_svc(struct pt_regs *regs)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
 	fp_user_discard();
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_svc(regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_fpac(regs, esr);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -783,7 +559,7 @@  asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
 static void noinstr el0_interrupt(struct pt_regs *regs,
 				  void (*handler)(struct pt_regs *))
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 
 	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
 
@@ -794,7 +570,7 @@  static void noinstr el0_interrupt(struct pt_regs *regs,
 	do_interrupt_handler(regs, handler);
 	irq_exit_rcu();
 
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -820,14 +596,15 @@  asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
 static void noinstr __el0_error_handler_common(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
+	irqentry_state_t state_nmi;
 
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_ERRCTX);
-	arm64_enter_nmi(regs);
+	state_nmi = irqentry_nmi_enter(regs);
 	do_serror(regs, esr);
-	arm64_exit_nmi(regs);
+	irqentry_nmi_exit(regs, state_nmi);
 	local_daif_restore(DAIF_PROCCTX);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -838,19 +615,19 @@  asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
 #ifdef CONFIG_COMPAT
 static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_cp15(esr, regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
-	enter_from_user_mode(regs);
+	irqentry_enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
 	local_daif_restore(DAIF_PROCCTX);
 	do_el0_svc_compat(regs);
-	exit_to_user_mode(regs);
+	exit_to_user_mode_wrapper(regs);
 }
 
 asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
@@ -924,7 +701,7 @@  asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
 	unsigned long esr = read_sysreg(esr_el1);
 	unsigned long far = read_sysreg(far_el1);
 
-	arm64_enter_nmi(regs);
+	irqentry_nmi_enter(regs);
 	panic_bad_stack(regs, esr, far);
 }
 #endif /* CONFIG_VMAP_STACK */
@@ -933,6 +710,7 @@  asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
 asmlinkage noinstr unsigned long
 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 {
+	irqentry_state_t state;
 	unsigned long ret;
 
 	/*
@@ -957,9 +735,9 @@  __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 	else if (cpu_has_pan())
 		set_pstate_pan(0);
 
-	arm64_enter_nmi(regs);
+	state = irqentry_nmi_enter(regs);
 	ret = do_sdei_event(regs, arg);
-	arm64_exit_nmi(regs);
+	irqentry_nmi_exit(regs, state);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 60fd85d5119d..9cc0f450fa76 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -41,9 +41,6 @@ 
 #include <asm/traps.h>
 #include <asm/system_misc.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
 struct pt_regs_offset {
 	const char *name;
 	int offset;
@@ -2179,104 +2176,6 @@  long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
-enum ptrace_syscall_dir {
-	PTRACE_SYSCALL_ENTER = 0,
-	PTRACE_SYSCALL_EXIT,
-};
-
-static void report_syscall_enter(struct pt_regs *regs)
-{
-	int regno;
-	unsigned long saved_reg;
-
-	/*
-	 * We have some ABI weirdness here in the way that we handle syscall
-	 * exit stops because we indicate whether or not the stop has been
-	 * signalled from syscall entry or syscall exit by clobbering a general
-	 * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
-	 * and restoring its old value after the stop. This means that:
-	 *
-	 * - Any writes by the tracer to this register during the stop are
-	 *   ignored/discarded.
-	 *
-	 * - The actual value of the register is not available during the stop,
-	 *   so the tracer cannot save it and restore it later.
-	 *
-	 * - Syscall stops behave differently to seccomp and pseudo-step traps
-	 *   (the latter do not nobble any registers).
-	 */
-	regno = (is_compat_task() ? 12 : 7);
-	saved_reg = regs->regs[regno];
-	regs->regs[regno] = PTRACE_SYSCALL_ENTER;
-
-	if (ptrace_report_syscall_entry(regs))
-		forget_syscall(regs);
-	regs->regs[regno] = saved_reg;
-}
-
-static void report_syscall_exit(struct pt_regs *regs)
-{
-	int regno;
-	unsigned long saved_reg;
-
-	/* See comment for report_syscall_enter() */
-	regno = (is_compat_task() ? 12 : 7);
-	saved_reg = regs->regs[regno];
-	regs->regs[regno] = PTRACE_SYSCALL_EXIT;
-
-	if (!test_thread_flag(TIF_SINGLESTEP)) {
-		ptrace_report_syscall_exit(regs, 0);
-		regs->regs[regno] = saved_reg;
-	} else {
-		regs->regs[regno] = saved_reg;
-
-		/*
-		 * Signal a pseudo-step exception since we are stepping but
-		 * tracer modifications to the registers may have rewound the
-		 * state machine.
-		 */
-		ptrace_report_syscall_exit(regs, 1);
-	}
-}
-
-int syscall_trace_enter(struct pt_regs *regs)
-{
-	unsigned long flags = read_thread_flags();
-
-	if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
-		report_syscall_enter(regs);
-		if (flags & _TIF_SYSCALL_EMU)
-			return NO_SYSCALL;
-	}
-
-	/* Do the secure computing after ptrace; failures should be fast. */
-	if (secure_computing() == -1)
-		return NO_SYSCALL;
-
-	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
-		trace_sys_enter(regs, regs->syscallno);
-
-	audit_syscall_entry(regs->syscallno, regs->orig_x0, regs->regs[1],
-			    regs->regs[2], regs->regs[3]);
-
-	return regs->syscallno;
-}
-
-void syscall_trace_exit(struct pt_regs *regs)
-{
-	unsigned long flags = read_thread_flags();
-
-	audit_syscall_exit(regs);
-
-	if (flags & _TIF_SYSCALL_TRACEPOINT)
-		trace_sys_exit(regs, syscall_get_return_value(current, regs));
-
-	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
-		report_syscall_exit(regs);
-
-	rseq_syscall(regs);
-}
-
 /*
  * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a.
  * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a77f4976e11..2982f6db6d96 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -19,6 +19,7 @@ 
 #include <linux/ratelimit.h>
 #include <linux/rseq.h>
 #include <linux/syscalls.h>
+#include <linux/entry-common.h>
 
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
@@ -1266,7 +1267,7 @@  static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
  * the kernel can handle, and then we build all the user-level signal handling
  * stack-frames in one go after that.
  */
-void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index ad198262b981..160ac9d15c27 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -7,6 +7,7 @@ 
 #include <linux/ptrace.h>
 #include <linux/randomize_kstack.h>
 #include <linux/syscalls.h>
+#include <linux/entry-common.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/exception.h>
@@ -66,14 +67,15 @@  static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
 	choose_random_kstack_offset(get_random_u16() & 0x1FF);
 }
 
-static inline bool has_syscall_work(unsigned long flags)
+static inline bool has_syscall_work(unsigned long work)
 {
-	return unlikely(flags & _TIF_SYSCALL_WORK);
+	return unlikely(work & SYSCALL_WORK_ENTER);
 }
 
 static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 			   const syscall_fn_t syscall_table[])
 {
+	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
 	unsigned long flags = read_thread_flags();
 
 	regs->orig_x0 = regs->regs[0];
@@ -107,7 +109,7 @@  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		return;
 	}
 
-	if (has_syscall_work(flags)) {
+	if (has_syscall_work(work)) {
 		/*
 		 * The de-facto standard way to skip a system call using ptrace
 		 * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
@@ -125,7 +127,7 @@  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		 */
 		if (scno == NO_SYSCALL)
 			syscall_set_return_value(current, regs, -ENOSYS, 0);
-		scno = syscall_trace_enter(regs);
+		scno = syscall_trace_enter(regs, regs->syscallno, work);
 		if (scno == NO_SYSCALL)
 			goto trace_exit;
 	}
@@ -137,14 +139,14 @@  static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	 * check again. However, if we were tracing entry, then we always trace
 	 * exit regardless, as the old entry assembly did.
 	 */
-	if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
-		flags = read_thread_flags();
-		if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
+	if (!has_syscall_work(work) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
+		work = READ_ONCE(current_thread_info()->syscall_work);
+		if (!has_syscall_work(work) && !report_single_step(work))
 			return;
 	}
 
 trace_exit:
-	syscall_trace_exit(regs);
+	syscall_exit_work(regs, work);
 }
 
 void do_el0_svc(struct pt_regs *regs)