diff mbox series

[v4,4/4] entry: Inline syscall_exit_to_user_mode()

Message ID 20250127-riscv_optimize_entry-v4-4-868cf7702dc9@rivosinc.com (mailing list archive)
State New
Headers show
Series entry: Move ret_from_fork() to C and inline syscall_exit_to_user_mode() | expand

Checks

Context Check Description
bjorn/pre-ci_am success Success
bjorn/build-rv32-defconfig success build-rv32-defconfig
bjorn/build-rv64-clang-allmodconfig success build-rv64-clang-allmodconfig
bjorn/build-rv64-gcc-allmodconfig success build-rv64-gcc-allmodconfig
bjorn/build-rv64-nommu-k210-defconfig success build-rv64-nommu-k210-defconfig
bjorn/build-rv64-nommu-k210-virt success build-rv64-nommu-k210-virt
bjorn/checkpatch success checkpatch
bjorn/dtb-warn-rv64 success dtb-warn-rv64
bjorn/header-inline success header-inline
bjorn/kdoc success kdoc
bjorn/module-param success module-param
bjorn/verify-fixes success verify-fixes
bjorn/verify-signedoff success verify-signedoff

Commit Message

Charlie Jenkins Jan. 28, 2025, 5:33 a.m. UTC
Architectures using the generic entry code can be optimized by having
syscall_exit_to_user_mode inlined.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 include/linux/entry-common.h | 43 ++++++++++++++++++++++++++++++++++++--
 kernel/entry/common.c        | 49 +-------------------------------------------
 2 files changed, 42 insertions(+), 50 deletions(-)

Comments

kernel test robot Feb. 5, 2025, 8:13 a.m. UTC | #1
Hello,

kernel test robot noticed a 1.9% improvement of stress-ng.seek.ops_per_sec on:


commit: c1bc35dd5bf6c7fa86a936a4fbe3b8d92fbf8641 ("[PATCH v4 4/4] entry: Inline syscall_exit_to_user_mode()")
url: https://github.com/intel-lab-lkp/linux/commits/Charlie-Jenkins/riscv-entry-Convert-ret_from_fork-to-C/20250128-133636
patch link: https://lore.kernel.org/all/20250127-riscv_optimize_entry-v4-4-868cf7702dc9@rivosinc.com/
patch subject: [PATCH v4 4/4] entry: Inline syscall_exit_to_user_mode()

testcase: stress-ng
config: x86_64-rhel-9.4
compiler: gcc-12
test machine: 64 threads 2 sockets Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz (Ice Lake) with 256G memory
parameters:

	nr_threads: 100%
	testtime: 60s
	test: seek
	cpufreq_governor: performance


In addition to that, the commit also has significant impact on the following tests:

+------------------+--------------------------------------------------------------------------------+
| testcase: change | stress-ng: stress-ng.context.swapcontext_calls_per_sec 1.9% improvement        |
| test machine     | 384 threads 2 sockets Intel(R) Xeon(R) 6972P (Granite Rapids) with 128G memory |
| test parameters  | cpufreq_governor=performance                                                   |
|                  | nr_threads=100%                                                                |
|                  | test=context                                                                   |
|                  | testtime=60s                                                                   |
+------------------+--------------------------------------------------------------------------------+




Details are as below:
-------------------------------------------------------------------------------------------------->


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20250205/202502051555.85ae6844-lkp@intel.com

=========================================================================================
compiler/cpufreq_governor/kconfig/nr_threads/rootfs/tbox_group/test/testcase/testtime:
  gcc-12/performance/x86_64-rhel-9.4/100%/debian-12-x86_64-20240206.cgz/lkp-icl-2sp8/seek/stress-ng/60s

commit: 
  37c1871b51 ("LoongArch: entry: Migrate ret_from_fork() to C")
  c1bc35dd5b ("entry: Inline syscall_exit_to_user_mode()")

37c1871b51766a66 c1bc35dd5bf6c7fa86a936a4fbe 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
    104886 ± 19%     +19.3%     125157 ± 17%  numa-meminfo.node1.Slab
      2583 ± 39%     +75.4%       4531 ± 40%  proc-vmstat.numa_hint_faults_local
    179842            +0.6%     180945        vmstat.system.in
    177.18            -2.6%     172.49        stress-ng.seek.nanosecs_per_seek
 1.223e+09            +1.9%  1.246e+09        stress-ng.seek.ops
  20376380            +1.9%   20771261        stress-ng.seek.ops_per_sec
      1.05 ± 20%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
     13.11 ± 28%    -100.0%       0.00        perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      3.12 ± 21%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      2785 ± 14%    -100.0%       0.00        perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    836.20 ± 43%    -100.0%       0.00        perf-sched.wait_and_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      2.07 ± 27%    -100.0%       0.00        perf-sched.wait_time.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    834.79 ± 44%    -100.0%       0.00        perf-sched.wait_time.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      2.04            +3.4%       2.11        perf-stat.i.MPKI
 3.682e+08            +2.0%  3.754e+08        perf-stat.i.cache-misses
 4.637e+08            +1.8%  4.721e+08        perf-stat.i.cache-references
      1.23            +1.5%       1.25        perf-stat.i.cpi
    603.02            -1.9%     591.60        perf-stat.i.cycles-between-cache-misses
 1.798e+11            -1.4%  1.772e+11        perf-stat.i.instructions
      0.82            -1.4%       0.80        perf-stat.i.ipc
      3902            +1.8%       3972 ±  2%  perf-stat.i.minor-faults
      3902            +1.8%       3972 ±  2%  perf-stat.i.page-faults
      2.05            +3.4%       2.12        perf-stat.overall.MPKI
      1.23            +1.5%       1.25        perf-stat.overall.cpi
    602.25            -1.9%     590.74        perf-stat.overall.cycles-between-cache-misses
      0.81            -1.4%       0.80        perf-stat.overall.ipc
 3.623e+08            +1.9%  3.693e+08        perf-stat.ps.cache-misses
 4.562e+08            +1.8%  4.645e+08        perf-stat.ps.cache-references
 1.769e+11            -1.4%  1.743e+11        perf-stat.ps.instructions
      3826            +1.8%       3893 ±  2%  perf-stat.ps.minor-faults
      3826            +1.8%       3893 ±  2%  perf-stat.ps.page-faults
 1.085e+13            -2.0%  1.063e+13        perf-stat.total.instructions
     10.62 ±  2%      -0.6       10.02 ±  3%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.llseek.stress_run
      9.46 ±  2%      -0.5        8.94 ±  3%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek.stress_run
      0.63            +0.0        0.66 ±  3%  perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek
      1.61            +0.0        1.64        perf-profile.calltrace.cycles-pp.copy_page_from_iter_atomic.generic_perform_write.generic_file_write_iter.vfs_write.ksys_write
      2.78            +0.1        2.85        perf-profile.calltrace.cycles-pp.__filemap_get_folio.simple_write_begin.generic_perform_write.generic_file_write_iter.vfs_write
      2.94            +0.1        3.02        perf-profile.calltrace.cycles-pp.simple_write_begin.generic_perform_write.generic_file_write_iter.vfs_write.ksys_write
      8.58            +0.2        8.77        perf-profile.calltrace.cycles-pp.copy_page_to_iter.filemap_read.vfs_read.ksys_read.do_syscall_64
      8.37            +0.2        8.56        perf-profile.calltrace.cycles-pp._copy_to_iter.copy_page_to_iter.filemap_read.vfs_read.ksys_read
      8.96            +0.2        9.17        perf-profile.calltrace.cycles-pp.folio_unlock.simple_write_end.generic_perform_write.generic_file_write_iter.vfs_write
      9.53            +0.2        9.75        perf-profile.calltrace.cycles-pp.simple_write_end.generic_perform_write.generic_file_write_iter.vfs_write.ksys_write
     12.86            +0.3       13.15        perf-profile.calltrace.cycles-pp.filemap_read.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe
     14.08            +0.3       14.42        perf-profile.calltrace.cycles-pp.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
     15.98            +0.3       16.32        perf-profile.calltrace.cycles-pp.generic_perform_write.generic_file_write_iter.vfs_write.ksys_write.do_syscall_64
     19.18            +0.4       19.55        perf-profile.calltrace.cycles-pp.generic_file_write_iter.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
     20.30            +0.4       20.67        perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
      7.39            -7.4        0.00        perf-profile.children.cycles-pp.syscall_exit_to_user_mode
     54.31            -0.7       53.60        perf-profile.children.cycles-pp.llseek
     56.77            -0.3       56.42        perf-profile.children.cycles-pp.do_syscall_64
     59.25            -0.3       58.95        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
      0.12 ±  3%      +0.0        0.15 ± 13%  perf-profile.children.cycles-pp.generic_file_read_iter
      1.73            +0.0        1.77        perf-profile.children.cycles-pp.x64_sys_call
      1.97            +0.1        2.02        perf-profile.children.cycles-pp.filemap_get_entry
      2.84            +0.1        2.92        perf-profile.children.cycles-pp.__filemap_get_folio
      2.97            +0.1        3.05        perf-profile.children.cycles-pp.simple_write_begin
      6.98            +0.1        7.09        perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      1.96            +0.1        2.08 ±  5%  perf-profile.children.cycles-pp.stress_shim_lseek
      8.92            +0.1        9.06        perf-profile.children.cycles-pp.entry_SYSCALL_64
      8.40            +0.2        8.58        perf-profile.children.cycles-pp._copy_to_iter
      8.61            +0.2        8.80        perf-profile.children.cycles-pp.copy_page_to_iter
      8.97            +0.2        9.19        perf-profile.children.cycles-pp.folio_unlock
      9.57            +0.2        9.80        perf-profile.children.cycles-pp.simple_write_end
     19.10            +0.3       19.38        perf-profile.children.cycles-pp.read
     12.94            +0.3       13.24        perf-profile.children.cycles-pp.filemap_read
     25.30            +0.3       25.62        perf-profile.children.cycles-pp.write
     14.14            +0.3       14.48        perf-profile.children.cycles-pp.vfs_read
     16.12            +0.3       16.47        perf-profile.children.cycles-pp.generic_perform_write
     14.72            +0.4       15.08        perf-profile.children.cycles-pp.ksys_read
     19.25            +0.4       19.62        perf-profile.children.cycles-pp.generic_file_write_iter
     20.95            +0.4       21.33        perf-profile.children.cycles-pp.ksys_write
     20.40            +0.4       20.78        perf-profile.children.cycles-pp.vfs_write
      6.38            -6.4        0.00        perf-profile.self.cycles-pp.syscall_exit_to_user_mode
      0.63            +0.0        0.65        perf-profile.self.cycles-pp.__filemap_get_folio
      2.20            +0.0        2.23        perf-profile.self.cycles-pp.entry_SYSCALL_64
      2.45            +0.0        2.48        perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      0.97            +0.0        1.00        perf-profile.self.cycles-pp.filemap_read
      1.51            +0.0        1.56        perf-profile.self.cycles-pp.x64_sys_call
      1.54            +0.0        1.59        perf-profile.self.cycles-pp.filemap_get_read_batch
      6.54            +0.1        6.64        perf-profile.self.cycles-pp.llseek
      6.74            +0.1        6.85        perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
      8.35            +0.2        8.54        perf-profile.self.cycles-pp._copy_to_iter
      8.93            +0.2        9.14        perf-profile.self.cycles-pp.folio_unlock
      3.91            +6.1        9.96        perf-profile.self.cycles-pp.do_syscall_64


***************************************************************************************************
lkp-gnr-2ap2: 384 threads 2 sockets Intel(R) Xeon(R) 6972P (Granite Rapids) with 128G memory
=========================================================================================
compiler/cpufreq_governor/kconfig/nr_threads/rootfs/tbox_group/test/testcase/testtime:
  gcc-12/performance/x86_64-rhel-9.4/100%/debian-12-x86_64-20240206.cgz/lkp-gnr-2ap2/context/stress-ng/60s

commit: 
  37c1871b51 ("LoongArch: entry: Migrate ret_from_fork() to C")
  c1bc35dd5b ("entry: Inline syscall_exit_to_user_mode()")

37c1871b51766a66 c1bc35dd5bf6c7fa86a936a4fbe 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
    933000 ± 10%     +30.5%    1217543 ± 18%  proc-vmstat.pgfree
     40.25 ± 37%     +70.8%      68.75 ± 37%  sched_debug.cpu.nr_uninterruptible.max
 1.063e+08            +1.9%  1.083e+08        stress-ng.context.ops
   1771139            +1.9%    1805148        stress-ng.context.ops_per_sec
   4608060            +1.9%    4696809        stress-ng.context.swapcontext_calls_per_sec
      0.06 ± 24%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      4.53 ± 59%    -100.0%       0.00        perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    217.64 ± 10%     -17.8%     178.86 ± 17%  perf-sched.wait_and_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      0.67 ± 83%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      3262 ±  3%    -100.0%       0.00        perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    505.60 ± 97%    -100.0%       0.00        perf-sched.wait_and_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    217.59 ± 10%     -18.1%     178.22 ± 17%  perf-sched.wait_time.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      0.61 ± 91%    -100.0%       0.00        perf-sched.wait_time.avg.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    502.72 ± 98%    -100.0%       0.00        perf-sched.wait_time.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
 1.197e+11            -4.4%  1.145e+11        perf-stat.i.branch-instructions
      1.48            +0.1        1.57        perf-stat.i.branch-miss-rate%
 1.761e+09            +1.5%  1.788e+09        perf-stat.i.branch-misses
      2.06            +4.1%       2.15        perf-stat.i.cpi
 6.404e+11            -4.3%  6.129e+11        perf-stat.i.instructions
      0.49            -3.9%       0.47        perf-stat.i.ipc
      1.47            +0.1        1.56        perf-stat.overall.branch-miss-rate%
      2.06            +4.1%       2.15        perf-stat.overall.cpi
      0.48            -3.9%       0.47        perf-stat.overall.ipc
 1.178e+11            -4.4%  1.126e+11        perf-stat.ps.branch-instructions
 1.732e+09            +1.5%  1.758e+09        perf-stat.ps.branch-misses
   6.3e+11            -4.3%  6.029e+11        perf-stat.ps.instructions
 3.849e+13            -3.5%  3.716e+13        perf-stat.total.instructions
      6.12            -6.1        0.00        perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
     33.80            -0.7       33.14        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.swapcontext
     31.62            -0.5       31.12        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
     90.78            -0.3       90.49        perf-profile.calltrace.cycles-pp.swapcontext
      1.40            -0.1        1.30        perf-profile.calltrace.cycles-pp.syscall_return_via_sysret.swapcontext
      1.44            -0.0        1.40        perf-profile.calltrace.cycles-pp.sigprocmask.__x64_sys_rt_sigprocmask.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
      0.57            +0.0        0.61        perf-profile.calltrace.cycles-pp.entry_SYSRETQ_unsafe_stack.swapcontext
      0.72            +0.0        0.77        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.swapcontext
      2.21            +0.1        2.28        perf-profile.calltrace.cycles-pp.stress_thread2
      2.20            +0.1        2.28        perf-profile.calltrace.cycles-pp.stress_thread3
      2.15            +0.1        2.24        perf-profile.calltrace.cycles-pp.stress_thread1
      7.38            +0.1        7.48        perf-profile.calltrace.cycles-pp._copy_to_user.__x64_sys_rt_sigprocmask.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
      8.90            +0.1        9.00        perf-profile.calltrace.cycles-pp._copy_from_user.__x64_sys_rt_sigprocmask.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
      1.26            +0.1        1.37        perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
     21.14            +0.3       21.49        perf-profile.calltrace.cycles-pp.__x64_sys_rt_sigprocmask.do_syscall_64.entry_SYSCALL_64_after_hwframe.swapcontext
     22.96            +0.5       23.48        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.swapcontext
      6.45            -6.4        0.00        perf-profile.children.cycles-pp.syscall_exit_to_user_mode
     32.36            -0.7       31.64        perf-profile.children.cycles-pp.do_syscall_64
     34.18            -0.7       33.52        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
     96.11            -0.1       96.00        perf-profile.children.cycles-pp.swapcontext
      1.59            -0.1        1.50        perf-profile.children.cycles-pp.syscall_return_via_sysret
      1.54            -0.0        1.51        perf-profile.children.cycles-pp.sigprocmask
      0.74            +0.1        0.79        perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
      1.72            +0.1        1.78        perf-profile.children.cycles-pp.stress_thread3
      1.70            +0.1        1.75        perf-profile.children.cycles-pp.stress_thread1
      1.72            +0.1        1.78        perf-profile.children.cycles-pp.stress_thread2
      7.64            +0.1        7.76        perf-profile.children.cycles-pp._copy_to_user
      1.44            +0.1        1.58        perf-profile.children.cycles-pp.x64_sys_call
      9.59            +0.2        9.74        perf-profile.children.cycles-pp._copy_from_user
      7.18            +0.2        7.35        perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
     12.65            +0.3       12.92        perf-profile.children.cycles-pp.entry_SYSCALL_64
     21.19            +0.3       21.50        perf-profile.children.cycles-pp.__x64_sys_rt_sigprocmask
      5.45            -5.5        0.00        perf-profile.self.cycles-pp.syscall_exit_to_user_mode
      1.59            -0.1        1.50        perf-profile.self.cycles-pp.syscall_return_via_sysret
      1.39            -0.0        1.36        perf-profile.self.cycles-pp.sigprocmask
      2.32            +0.0        2.35        perf-profile.self.cycles-pp.entry_SYSCALL_64
      1.17            +0.0        1.20        perf-profile.self.cycles-pp.stress_thread3
      1.18            +0.0        1.21        perf-profile.self.cycles-pp.stress_thread2
      1.17            +0.0        1.20        perf-profile.self.cycles-pp.stress_thread1
      2.83            +0.0        2.87        perf-profile.self.cycles-pp.__x64_sys_rt_sigprocmask
      2.00            +0.1        2.05        perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      0.73            +0.1        0.79        perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack
      7.50            +0.1        7.62        perf-profile.self.cycles-pp._copy_to_user
      9.20            +0.1        9.34        perf-profile.self.cycles-pp._copy_from_user
      1.22            +0.1        1.37        perf-profile.self.cycles-pp.x64_sys_call
      6.99            +0.2        7.15        perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
     49.94            +0.4       50.34        perf-profile.self.cycles-pp.swapcontext
      3.36            +5.2        8.51        perf-profile.self.cycles-pp.do_syscall_64





Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
diff mbox series

Patch

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index fc61d0205c97084acc89c8e45e088946f5e6d9b2..f94f3fdf15fc0091223cc9f7b823970302e67312 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -14,6 +14,7 @@ 
 #include <linux/kmsan.h>
 
 #include <asm/entry-common.h>
+#include <asm/syscall.h>
 
 /*
  * Define dummy _TIF work flags if not defined by the architecture or for
@@ -366,6 +367,15 @@  static __always_inline void exit_to_user_mode(void)
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
+/**
+ * syscall_exit_work - Handle work before returning to user mode
+ * @regs:	Pointer to current pt_regs
+ * @work:	Current thread syscall work
+ *
+ * Do one-time syscall specific work.
+ */
+void syscall_exit_work(struct pt_regs *regs, unsigned long work);
+
 /**
  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
  * @regs:	Pointer to currents pt_regs
@@ -379,7 +389,30 @@  static __always_inline void exit_to_user_mode(void)
  * make the final state transitions. Interrupts must stay disabled between
  * return from this function and the invocation of exit_to_user_mode().
  */
-void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
+{
+	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+	unsigned long nr = syscall_get_nr(current, regs);
+
+	CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
+
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
+			local_irq_enable();
+	}
+
+	rseq_syscall(regs);
+
+	/*
+	 * Do one-time syscall specific work. If these work items are
+	 * enabled, we want to run them exactly once per syscall exit with
+	 * interrupts enabled.
+	 */
+	if (unlikely(work & SYSCALL_WORK_EXIT))
+		syscall_exit_work(regs, work);
+	local_irq_disable_exit_to_user();
+	exit_to_user_mode_prepare(regs);
+}
 
 /**
  * syscall_exit_to_user_mode - Handle work before returning to user mode
@@ -410,7 +443,13 @@  void syscall_exit_to_user_mode_work(struct pt_regs *regs);
  * exit_to_user_mode(). This function is preferred unless there is a
  * compelling architectural reason to use the separate functions.
  */
-void syscall_exit_to_user_mode(struct pt_regs *regs);
+static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
+{
+	instrumentation_begin();
+	syscall_exit_to_user_mode_work(regs);
+	instrumentation_end();
+	exit_to_user_mode();
+}
 
 /**
  * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index e33691d5adf7aab4af54cf2bf8e5ef5bd6ad1424..f55e421fb196dd5f9d4e34dd85ae096c774cf879 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -146,7 +146,7 @@  static inline bool report_single_step(unsigned long work)
 	return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
 }
 
-static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
+void syscall_exit_work(struct pt_regs *regs, unsigned long work)
 {
 	bool step;
 
@@ -173,53 +173,6 @@  static void syscall_exit_work(struct pt_regs *regs, unsigned long work)
 		ptrace_report_syscall_exit(regs, step);
 }
 
-/*
- * Syscall specific exit to user mode preparation. Runs with interrupts
- * enabled.
- */
-static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
-{
-	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
-	unsigned long nr = syscall_get_nr(current, regs);
-
-	CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
-
-	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
-		if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
-			local_irq_enable();
-	}
-
-	rseq_syscall(regs);
-
-	/*
-	 * Do one-time syscall specific work. If these work items are
-	 * enabled, we want to run them exactly once per syscall exit with
-	 * interrupts enabled.
-	 */
-	if (unlikely(work & SYSCALL_WORK_EXIT))
-		syscall_exit_work(regs, work);
-}
-
-static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
-	syscall_exit_to_user_mode_prepare(regs);
-	local_irq_disable_exit_to_user();
-	exit_to_user_mode_prepare(regs);
-}
-
-void syscall_exit_to_user_mode_work(struct pt_regs *regs)
-{
-	__syscall_exit_to_user_mode_work(regs);
-}
-
-__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
-{
-	instrumentation_begin();
-	__syscall_exit_to_user_mode_work(regs);
-	instrumentation_end();
-	exit_to_user_mode();
-}
-
 noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);