@@ -5,51 +5,26 @@
#
# Architecture requirements
#
-# * arm/arm64/powerpc
#
-# Rely on implicit context synchronization as a result of exception return
-# when returning from IPI handler, and when returning to user-space.
-#
-# * x86
-#
-# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
-# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
-# instruction is core serializing, but not SYSEXIT.
-#
-# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
-# However, it can return to user-space through either SYSRETL (compat code),
-# SYSRETQ, or IRET.
-#
-# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
-# instead on write_cr3() performed by switch_mm() to provide core serialization
-# after changing the current mm, and deal with the special case of kthread ->
-# uthread (temporarily keeping current mm into active_mm) by issuing a
-# sync_core_before_usermode() in that specific case.
-#
- -----------------------
- | arch |status|
- -----------------------
- | alpha: | TODO |
- | arc: | TODO |
- | arm: | ok |
- | arm64: | ok |
- | csky: | TODO |
- | h8300: | TODO |
- | hexagon: | TODO |
- | ia64: | TODO |
- | m68k: | TODO |
- | microblaze: | TODO |
- | mips: | TODO |
- | nds32: | TODO |
- | nios2: | TODO |
- | openrisc: | TODO |
- | parisc: | TODO |
- | powerpc: | ok |
- | riscv: | TODO |
- | s390: | TODO |
- | sh: | TODO |
- | sparc: | TODO |
- | um: | TODO |
- | x86: | ok |
- | xtensa: | TODO |
- -----------------------
+# An architecture that wants to support
+# MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE needs to define precisely what it
+# is supposed to do and implement membarrier_sync_core_before_usermode() to
+# make it do that. Then it can select ARCH_HAS_MEMBARRIER_SYNC_CORE via
+# Kconfig and document what SYNC_CORE does on that architecture in this
+# list.
+#
+# On x86, a program can safely modify code, issue
+# MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, and then execute that code, via
+# the modified address or an alias, from any thread in the calling process.
+#
+# On arm and arm64, a program can modify code, flush the icache as needed,
+# and issue MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE to force a "context
+# synchronizing event", aka pipeline flush on all CPUs that might run the
+# calling process. Then the program can execute the modified code as long
+# as it is executed from an address consistent with the icache flush and
+# the CPU's cache type. On arm, cacheflush(2) can be used for the icache
+# flushing operation.
+#
+# On powerpc, a program can use MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
+# similarly to arm64. It would be nice if the powerpc maintainers could
+# add a more clear explanantion.
new file mode 100644
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM_MEMBARRIER_H
+#define _ASM_ARM_MEMBARRIER_H
+
+#include <asm/barrier.h>
+
+/*
+ * On arm, anyone trying to use membarrier() to handle JIT code is required
+ * to first flush the icache (most likely by using cacheflush(2) and then
+ * do SYNC_CORE. All that's needed after the icache flush is to execute a
+ * "context synchronization event".
+ *
+ * Returning to user mode is a context synchronization event, so no
+ * specific action by the kernel is needed other than ensuring that the
+ * kernel is entered.
+ */
+static inline void membarrier_sync_core_before_usermode(void)
+{
+}
+
+#endif /* _ASM_ARM_MEMBARRIER_H */
new file mode 100644
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_MEMBARRIER_H
+#define _ASM_ARM64_MEMBARRIER_H
+
+#include <asm/barrier.h>
+
+/*
+ * On arm64, anyone trying to use membarrier() to handle JIT code is
+ * required to first flush the icache and then do SYNC_CORE. All that's
+ * needed after the icache flush is to execute a "context synchronization
+ * event". Right now, ERET does this, and we are guaranteed to ERET before
+ * any user code runs. If Linux ever programs the CPU to make ERET stop
+ * being a context synchronizing event, then this will need to be adjusted.
+ */
+static inline void membarrier_sync_core_before_usermode(void)
+{
+}
+
+#endif /* _ASM_ARM64_MEMBARRIER_H */
@@ -1,4 +1,14 @@
#ifndef _ASM_POWERPC_MEMBARRIER_H
#define _ASM_POWERPC_MEMBARRIER_H
+#include <asm/barrier.h>
+
+/*
+ * The RFI family of instructions are context synchronising, and
+ * that is how we return to userspace, so nothing is required here.
+ */
+static inline void membarrier_sync_core_before_usermode(void)
+{
+}
+
#endif /* _ASM_POWERPC_MEMBARRIER_H */
@@ -90,7 +90,6 @@ config X86
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
- select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_DEBUG_WX
new file mode 100644
@@ -0,0 +1,25 @@
+#ifndef _ASM_X86_MEMBARRIER_H
+#define _ASM_X86_MEMBARRIER_H
+
+#include <asm/sync_core.h>
+
+/*
+ * Ensure that the CPU notices any instruction changes before the next time
+ * it returns to usermode.
+ */
+static inline void membarrier_sync_core_before_usermode(void)
+{
+ /* With PTI, we unconditionally serialize before running user code. */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * Even if we're in an interrupt, we might reschedule before returning,
+ * in which case we could switch to a different thread in the same mm
+ * and return using SYSRET or SYSEXIT. Instead of trying to keep
+ * track of our need to sync the core, just sync right away.
+ */
+ sync_core();
+}
+
+#endif /* _ASM_X86_MEMBARRIER_H */
@@ -88,24 +88,4 @@ static inline void sync_core(void)
iret_to_self();
}
-/*
- * Ensure that a core serializing instruction is issued before returning
- * to user-mode. x86 implements return to user-space through sysexit,
- * sysrel, and sysretq, which are not core serializing.
- */
-static inline void sync_core_before_usermode(void)
-{
- /* With PTI, we unconditionally serialize before running user code. */
- if (static_cpu_has(X86_FEATURE_PTI))
- return;
-
- /*
- * Even if we're in an interrupt, we might reschedule before returning,
- * in which case we could switch to a different thread in the same mm
- * and return using SYSRET or SYSEXIT. Instead of trying to keep
- * track of our need to sync the core, just sync right away.
- */
- sync_core();
-}
-
#endif /* _ASM_X86_SYNC_CORE_H */
@@ -17,7 +17,7 @@
#include <linux/kprobes.h>
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
-#include <linux/sync_core.h>
+#include <asm/sync_core.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -41,12 +41,12 @@
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/set_memory.h>
-#include <linux/sync_core.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>
#include <asm/intel-family.h>
#include <asm/processor.h>
+#include <asm/sync_core.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
@@ -12,6 +12,7 @@
#include <linux/sched/mm.h>
#include <asm/tlbflush.h>
+#include <asm/membarrier.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
#include <asm/cache.h>
@@ -491,7 +492,7 @@ static void sync_core_if_membarrier_enabled(struct mm_struct *next)
#ifdef CONFIG_MEMBARRIER
if (unlikely(atomic_read(&next->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))
- sync_core_before_usermode();
+ membarrier_sync_core_before_usermode();
#endif
}
@@ -20,8 +20,8 @@
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/security.h>
-#include <linux/sync_core.h>
#include <linux/prefetch.h>
+#include <asm/sync_core.h>
#include "gru.h"
#include "grutables.h"
#include "grulib.h"
@@ -16,7 +16,7 @@
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
#else
-#include <linux/sync_core.h>
+#include <asm/sync_core.h>
#include <asm/tsc.h>
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
@@ -16,10 +16,10 @@
#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
-#include <linux/sync_core.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/export.h>
+#include <asm/sync_core.h>
#include <asm/io_apic.h>
#include "gru.h"
#include "grulib.h"
@@ -7,7 +7,6 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/gfp.h>
-#include <linux/sync_core.h>
/*
* Routines for handling mm_structs
deleted file mode 100644
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SYNC_CORE_H
-#define _LINUX_SYNC_CORE_H
-
-#ifdef CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
-#include <asm/sync_core.h>
-#else
-/*
- * This is a dummy sync_core_before_usermode() implementation that can be used
- * on all architectures which return to user-space through core serializing
- * instructions.
- * If your architecture returns to user-space through non-core-serializing
- * instructions, you need to write your own functions.
- */
-static inline void sync_core_before_usermode(void)
-{
-}
-#endif
-
-#endif /* _LINUX_SYNC_CORE_H */
-
@@ -2364,9 +2364,6 @@ source "kernel/Kconfig.locks"
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
bool
-config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
- bool
-
# It may be useful for an architecture to override the definitions of the
# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in <linux/syscalls.h>
# and the COMPAT_ variants in <linux/compat.h>, in particular to use a
@@ -5,6 +5,14 @@
* membarrier system call
*/
#include "sched.h"
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
+#include <asm/membarrier.h>
+#else
+static inline void membarrier_sync_core_before_usermode(void)
+{
+ compiletime_assert(0, "architecture does not implement membarrier_sync_core_before_usermode");
+}
+#endif
/*
* The basic principle behind the regular memory barrier mode of
@@ -231,12 +239,12 @@ static void ipi_sync_core(void *info)
* the big comment at the top of this file.
*
* A sync_core() would provide this guarantee, but
- * sync_core_before_usermode() might end up being deferred until
- * after membarrier()'s smp_mb().
+ * membarrier_sync_core_before_usermode() might end up being deferred
+ * until after membarrier()'s smp_mb().
*/
smp_mb(); /* IPIs should be serializing but paranoid. */
- sync_core_before_usermode();
+ membarrier_sync_core_before_usermode();
}
static void ipi_rseq(void *info)