diff mbox series

[RFC,7/8] arm64/sve: Don't disable SVE on syscalls return

Message ID 20190118164610.8123-8-julien.grall@arm.com (mailing list archive)
State RFC
Headers show
Series arm64/sve: First steps towards optimizing syscalls | expand

Commit Message

Julien Grall Jan. 18, 2019, 4:46 p.m. UTC
Per the syscalls ABI, SVE registers will be unknown after a syscalls. In
practice the kernel will disable SVE and zero all the registers but the
first 128-bits of the vector on the next SVE instructions. In workload
mixing SVE and syscall, this will result of 2 entry/exit to the kernel
per exit.

To avoid the second entry/exit, a new flag TIF_SVE_NEEDS_FLUSH is
introduced to mark a task that needs to flush the SVE context on
return to userspace.

On entry to a syscall, the flag TIF_SVE will still be cleared. It will
be restored on return to userspace once the SVE state has been flushed.
This means that if a task requires to synchronize the FP state during a
syscall (e.g context switch, signal), only the FPSIMD registers will be
saved. When the task is rescheduled, the SVE state will be loaded from
FPSIMD state.

Signed-off-by: Julien Grall <julien.grall@arm.com>
---
 arch/arm64/include/asm/thread_info.h |  5 ++++-
 arch/arm64/kernel/fpsimd.c           | 32 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c          |  1 +
 arch/arm64/kernel/ptrace.c           |  7 +++++++
 arch/arm64/kernel/signal.c           | 14 +++++++++++++-
 arch/arm64/kernel/syscall.c          | 13 +++++--------
 6 files changed, 62 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index bbca68b54732..78a836d61dc1 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -87,6 +87,7 @@  void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
+#define TIF_SVE_NEEDS_FLUSH	6	/* Flush SVE registers on return */
 #define TIF_NOHZ		7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
@@ -114,10 +115,12 @@  void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_SVE		(1 << TIF_SVE)
+#define _TIF_SVE_NEEDS_FLUSH	(1 << TIF_SVE_NEEDS_FLUSH)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK)
+				 _TIF_UPROBE | _TIF_FSCHECK | \
+				 _TIF_SVE_NEEDS_FLUSH)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index b3870905a492..ff76e7cc358d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -148,6 +148,8 @@  extern void __percpu *efi_sve_state;
  */
 static void __sve_free(struct task_struct *task)
 {
+	/* SVE context will be zeroed when allocated. */
+	clear_tsk_thread_flag(task, TIF_SVE_NEEDS_FLUSH);
 	kfree(task->thread.sve_state);
 	task->thread.sve_state = NULL;
 }
@@ -204,6 +206,11 @@  static void sve_free(struct task_struct *task)
  *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
  *    irrespective of whether TIF_SVE is clear or set, since these are
  *    not vector length dependent.
+ *
+ *  * When TIF_SVE_NEEDS_FLUSH is set, all the SVE registers but the first
+ *    128-bits of the Z-registers are logically zero but not stored anywhere.
+ *    Saving logically zero bits across context switches is therefore
+ *    pointless, although they must be zeroed before re-entering userspace.
  */
 
 /*
@@ -213,6 +220,14 @@  static void sve_free(struct task_struct *task)
  * thread_struct is known to be up to date, when preparing to enter
  * userspace.
  *
+ * When TIF_SVE_NEEDS_FLUSH is set, the SVE state will be restored from the
+ * FPSIMD state.
+ *
+ * TIF_SVE_NEEDS_FLUSH and TIF_SVE set at the same time should never happen.
+ * In the unlikely case it happens, the code is able to cope with it. I will
+ * first restore the SVE registers and then flush them in
+ * fpsimd_restore_current_state.
+ *
  * Softirqs (and preemption) must be disabled.
  */
 static void task_fpsimd_load(void)
@@ -223,6 +238,12 @@  static void task_fpsimd_load(void)
 		sve_load_state(sve_pffr(&current->thread),
 			       &current->thread.uw.fpsimd_state.fpsr,
 			       sve_vq_from_vl(current->thread.sve_vl) - 1);
+	else if (system_supports_sve() &&
+		 test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+		sve_load_from_fpsimd_state(&current->thread.uw.fpsimd_state,
+					   sve_vq_from_vl(current->thread.sve_vl) - 1);
+		set_thread_flag(TIF_SVE);
+	}
 	else
 		fpsimd_load_state(&current->thread.uw.fpsimd_state);
 }
@@ -1014,6 +1035,17 @@  void fpsimd_restore_current_state(void)
 		fpsimd_bind_task_to_cpu();
 	}
 
+	if (system_supports_sve() &&
+	    test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+		/*
+		 * The userspace had SVE enabled on entry to the kernel
+		 * and requires the state to be flushed.
+		 */
+		sve_flush_live();
+		sve_user_enable();
+		set_thread_flag(TIF_SVE);
+	}
+
 	local_bh_enable();
 }
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a0f985a6ac50..52e27d18cb8f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -319,6 +319,7 @@  int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	 * and disable discard SVE state for p:
 	 */
 	clear_tsk_thread_flag(p, TIF_SVE);
+	clear_tsk_thread_flag(p, TIF_SVE_NEEDS_FLUSH);
 	p->thread.sve_state = NULL;
 
 	/*
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9dce33b0e260..20099c0604be 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -899,6 +899,11 @@  static int sve_set(struct task_struct *target,
 		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
 				SVE_PT_FPSIMD_OFFSET);
 		clear_tsk_thread_flag(target, TIF_SVE);
+		/*
+		 * If ptrace requested to use FPSIMD, then don't try to
+		 * re-enable SVE when the task is running again.
+		 */
+		clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 		goto out;
 	}
 
@@ -923,6 +928,8 @@  static int sve_set(struct task_struct *target,
 	 */
 	fpsimd_sync_to_sve(target);
 	set_tsk_thread_flag(target, TIF_SVE);
+	/* Don't flush SVE registers on return as ptrace will update them. */
+	clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 
 	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
 	start = SVE_PT_SVE_OFFSET;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 11e335f489b0..cf70b196fc82 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -535,6 +535,17 @@  static int restore_sigframe(struct pt_regs *regs,
 		} else {
 			err = restore_fpsimd_context(user.fpsimd);
 		}
+
+		/*
+		 * When successfully restoring the:
+		 *	- FPSIMD context, we don't want to re-enable SVE
+		 *	- SVE context, we don't want to override what was
+		 *	restored
+		 */
+		if (err == 0)
+			clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
+
+
 	}
 
 	return err;
@@ -947,7 +958,8 @@  asmlinkage void do_notify_resume(struct pt_regs *regs,
 				rseq_handle_notify_resume(NULL, regs);
 			}
 
-			if (thread_flags & _TIF_FOREIGN_FPSTATE)
+			if (thread_flags & (_TIF_FOREIGN_FPSTATE |
+					    _TIF_SVE_NEEDS_FLUSH))
 				fpsimd_restore_current_state();
 		}
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 5610ac01c1ec..5ae2100fc5e8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -111,16 +111,13 @@  static inline void sve_user_discard(void)
 	if (!system_supports_sve())
 		return;
 
-	clear_thread_flag(TIF_SVE);
-
 	/*
-	 * task_fpsimd_load() won't be called to update CPACR_EL1 in
-	 * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
-	 * happens if a context switch or kernel_neon_begin() or context
-	 * modification (sigreturn, ptrace) intervenes.
-	 * So, ensure that CPACR_EL1 is already correct for the fast-path case.
+	 * TIF_SVE is cleared to save the FPSIMD state rather than the SVE
+	 * state on context switch. The bit will be set again while
+	 * restoring/zeroing the registers.
 	 */
-	sve_user_disable();
+	if (test_and_clear_thread_flag(TIF_SVE))
+		set_thread_flag(TIF_SVE_NEEDS_FLUSH);
 }
 
 asmlinkage void el0_svc_handler(struct pt_regs *regs)