@@ -136,6 +136,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
* thread information flags:
* TIF_USEDFPU - FPU was used by this task this quantum (SMP)
* TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
+ * or TIF_NOTIFY_IPI
*
* Any bit in the range of 0..15 will cause do_work_pending() to be invoked.
*/
@@ -144,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_UPROBE 3 /* breakpointed or singlestepping */
#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
+#define TIF_NOTIFY_IPI 5 /* pending IPI on TIF_POLLLING idle CPU */
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -164,6 +166,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
+#define _TIF_NOTIFY_IPI (1 << TIF_NOTIFY_IPI)
/* Checks for any syscall work in entry-common.S */
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
Add support for TIF_NOTIFY_IPI on ARM. With TIF_NOTIFY_IPI, a sender sending an IPI to an idle CPU in TIF_POLLING mode will set the TIF_NOTIFY_IPI flag in the target's idle tasks's thread_info to pull the CPU out of idle, as opposed to setting TIF_NEED_RESCHED previously. This avoids spurious calls to schedule_idle() in cases where an IPI does not necessarily wake up a task on the idle CPU. IPI throughput measured using a modified version of Anton Blanchard's ipistorm benchmark [1], configured to measure time taken to perform a fixed number of smp_call_function_single() (with wait set to 1), improves significantly with TIF_NOTIFY_IPI on a dual socket Ampere Server (2 x 64C) with the benchmark time reducing to less than half for 100000 IPIs between two CPUs. (Note: Only WFI idle mode was left enabled during testing to reduce variance) cmdline: insmod ipistorm.ko numipi=100000 single=1 offset=8 cpulist=8 wait=1 ================================================================== Test : ipistorm (modified) Units : Normalized runtime Interpretation: Lower is better Statistic : AMean ================================================================== kernel: time [pct imp] tip:sched/core 1.00 [0.00] tip:sched/core + TIF_NOTIFY_IPI 0.41 [59.29] tip:sched/core was at tag "sched-core-2024-01-08" at the time of testing. Cc: Russell King <linux@armlinux.org.uk> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ben Segall <bsegall@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Daniel Bristot de Oliveira <bristot@redhat.com> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org Link: https://github.com/antonblanchard/ipistorm [1] Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> --- arch/arm/include/asm/thread_info.h | 3 +++ 1 file changed, 3 insertions(+)