[2/3] x86/asm: Move 'status' from thread_struct to thread_info
diff mbox

Message ID 03148bcc1b217100e6e8ecf6a5468c45cf4304b6.1517164461.git.luto@kernel.org
State New
Headers show

Commit Message

Andy Lutomirski Jan. 28, 2018, 6:38 p.m. UTC
The TS_COMPAT bit is very hot and is accessed from code paths that
mostly also touch thread_info::flags.  Move it into struct
thread_info to improve cache locality.

The only reason it was in thread_struct is that there was a brief
period during which we didn't allow arch-specific fields in struct
thread_info.

Linus suggested further changing:

  ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);

to:

  if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED)))
          ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);

on the theory that frequently dirtying the cacheline even in pure
64-bit code that never needs to modify status hurts performance.
That could be a reasonable followup patch, but I suspect it matters
less on top of this patch.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/common.c            | 4 ++--
 arch/x86/include/asm/processor.h   | 2 --
 arch/x86/include/asm/syscall.h     | 6 +++---
 arch/x86/include/asm/thread_info.h | 3 ++-
 arch/x86/kernel/process_64.c       | 4 ++--
 arch/x86/kernel/ptrace.c           | 2 +-
 arch/x86/kernel/signal.c           | 2 +-
 7 files changed, 11 insertions(+), 12 deletions(-)

Comments

Ingo Molnar Jan. 28, 2018, 7:02 p.m. UTC | #1
* Andy Lutomirski <luto@kernel.org> wrote:

> The TS_COMPAT bit is very hot and is accessed from code paths that
> mostly also touch thread_info::flags.  Move it into struct
> thread_info to improve cache locality.
> 
> The only reason it was in thread_struct is that there was a brief
> period during which we didn't allow arch-specific fields in struct
> thread_info.
> 
> Linus suggested further changing:
> 
>   ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
> 
> to:
> 
>   if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED)))
>           ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
> 
> on the theory that frequently dirtying the cacheline even in pure
> 64-bit code that never needs to modify status hurts performance.
> That could be a reasonable followup patch, but I suspect it matters
> less on top of this patch.
> 
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/entry/common.c            | 4 ++--
>  arch/x86/include/asm/processor.h   | 2 --
>  arch/x86/include/asm/syscall.h     | 6 +++---
>  arch/x86/include/asm/thread_info.h | 3 ++-
>  arch/x86/kernel/process_64.c       | 4 ++--
>  arch/x86/kernel/ptrace.c           | 2 +-
>  arch/x86/kernel/signal.c           | 2 +-
>  7 files changed, 11 insertions(+), 12 deletions(-)

Reviewed-by: Ingo Molnar <mingo@kernel.org>

Thanks,

	Ingo
Linus Torvalds Jan. 28, 2018, 7:19 p.m. UTC | #2
On Sun, Jan 28, 2018 at 10:38 AM, Andy Lutomirski <luto@kernel.org> wrote:
>
> Linus suggested further changing:
>
>   ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
>
> to:
>
>   if (unlikely(ti->status & (TS_COMPAT|TS_I386_REGS_POKED)))
>           ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
>
> on the theory that frequently dirtying the cacheline even in pure
> 64-bit code that never needs to modify status hurts performance.
> That could be a reasonable followup patch, but I suspect it matters
> less on top of this patch.

Ack, that should be done separately from the movement anyway.

And yes, it's possible that once it's in the same cacheline with the
thread flags, you can't even see the issue anyway. Although I *think*
all those early fields are normally mostly read-only, so that "read
before clear" may end up being a good idea regardless.

                Linus

Patch
diff mbox

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index d7d3cc24baf4..99081340d19a 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -206,7 +206,7 @@  __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	 * special case only applies after poking regs and before the
 	 * very next return to user mode.
 	 */
-	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
 #endif
 
 	user_enter_irqoff();
@@ -304,7 +304,7 @@  static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 	unsigned int nr = (unsigned int)regs->orig_ax;
 
 #ifdef CONFIG_IA32_EMULATION
-	current->thread.status |= TS_COMPAT;
+	ti->status |= TS_COMPAT;
 #endif
 
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d3a67fba200a..99799fbd0f7e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -460,8 +460,6 @@  struct thread_struct {
 	unsigned short		gsindex;
 #endif
 
-	u32			status;		/* thread synchronous flags */
-
 #ifdef CONFIG_X86_64
 	unsigned long		fsbase;
 	unsigned long		gsbase;
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index e3c95e8e61c5..03eedc21246d 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@  static inline long syscall_get_error(struct task_struct *task,
 	 * TS_COMPAT is set for 32-bit syscall entries and then
 	 * remains set until we return to user mode.
 	 */
-	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
 		/*
 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
 		 * and will match correctly in comparisons.
@@ -116,7 +116,7 @@  static inline void syscall_get_arguments(struct task_struct *task,
 					 unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread.status & TS_COMPAT)
+	if (task->thread_info.status & TS_COMPAT)
 		switch (i) {
 		case 0:
 			if (!n--) break;
@@ -177,7 +177,7 @@  static inline void syscall_set_arguments(struct task_struct *task,
 					 const unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread.status & TS_COMPAT)
+	if (task->thread_info.status & TS_COMPAT)
 		switch (i) {
 		case 0:
 			if (!n--) break;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 00223333821a..eda3b6823ca4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -55,6 +55,7 @@  struct task_struct;
 
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
+	u32			status;		/* thread synchronous flags */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
@@ -221,7 +222,7 @@  static inline int arch_within_stack_frames(const void * const stack,
 #define in_ia32_syscall() true
 #else
 #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
-			   current->thread.status & TS_COMPAT)
+			   current_thread_info()->status & TS_COMPAT)
 #endif
 
 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c75466232016..9eb448c7859d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -557,7 +557,7 @@  static void __set_personality_x32(void)
 	 * Pretend to come from a x32 execve.
 	 */
 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
-	current->thread.status &= ~TS_COMPAT;
+	current_thread_info()->status &= ~TS_COMPAT;
 #endif
 }
 
@@ -571,7 +571,7 @@  static void __set_personality_ia32(void)
 	current->personality |= force_personality32;
 	/* Prepare the first "return" to user space */
 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
-	current->thread.status |= TS_COMPAT;
+	current_thread_info()->status |= TS_COMPAT;
 #endif
 }
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f37d18124648..ed5c4cdf0a34 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -935,7 +935,7 @@  static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 		 */
 		regs->orig_ax = value;
 		if (syscall_get_nr(child, regs) >= 0)
-			child->thread.status |= TS_I386_REGS_POKED;
+			child->thread_info.status |= TS_I386_REGS_POKED;
 		break;
 
 	case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b9e00e8f1c9b..4cdc0b27ec82 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -787,7 +787,7 @@  static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 	 * than the tracee.
 	 */
 #ifdef CONFIG_IA32_EMULATION
-	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI