diff mbox

[3/3] x86_64,entry: Remove the syscall exit audit and schedule optimizations

Message ID 1072a16a8d4ad1b11b8062f76e3236b9771b0fb6.1415403984.git.luto@amacapital.net (mailing list archive)
State New, archived
Headers show

Commit Message

Andy Lutomirski Nov. 7, 2014, 11:58 p.m. UTC
We used to optimize rescheduling and audit on syscall exit.  Now that
the full slow path is reasonably fast, remove these optimizations.

This adds something like 10ns to the previously optimized paths on my
computer, presumably due mostly to SAVE_REST / RESTORE_REST.

I think that we should eventually replace both the syscall and
non-paranoid interrupt exit slow paths with a pair of C functions
along the lines of the syscall entry hooks.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 52 +++++-----------------------------------------
 1 file changed, 5 insertions(+), 47 deletions(-)

Comments

Borislav Petkov Jan. 9, 2015, 3:53 p.m. UTC | #1
On Fri, Nov 07, 2014 at 03:58:19PM -0800, Andy Lutomirski wrote:
> We used to optimize rescheduling and audit on syscall exit.  Now that
> the full slow path is reasonably fast, remove these optimizations.
> 
> This adds something like 10ns to the previously optimized paths on my
> computer, presumably due mostly to SAVE_REST / RESTORE_REST.
> 
> I think that we should eventually replace both the syscall and
> non-paranoid interrupt exit slow paths with a pair of C functions
> along the lines of the syscall entry hooks.
> 
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> ---
>  arch/x86/kernel/entry_64.S | 52 +++++-----------------------------------------
>  1 file changed, 5 insertions(+), 47 deletions(-)
> 
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index a5afdf0f7fa4..222dc5c45ac3 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -427,15 +427,12 @@ system_call_fastpath:
>   * Has incomplete stack frame and undefined top of stack.
>   */
>  ret_from_sys_call:
> -	movl $_TIF_ALLWORK_MASK,%edi
> -	/* edi:	flagmask */
> -sysret_check:
> +	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
> +	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
> +
>  	LOCKDEP_SYS_EXIT
>  	DISABLE_INTERRUPTS(CLBR_NONE)
>  	TRACE_IRQS_OFF
> -	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
> -	andl %edi,%edx
> -	jnz  sysret_careful
>  	CFI_REMEMBER_STATE
>  	/*
>  	 * sysretq will re-enable interrupts:
> @@ -449,49 +446,10 @@ sysret_check:
>  	USERGS_SYSRET64
>  
>  	CFI_RESTORE_STATE
> -	/* Handle reschedules */
> -	/* edx:	work, edi: workmask */
> -sysret_careful:
> -	bt $TIF_NEED_RESCHED,%edx
> -	jnc sysret_signal
> -	TRACE_IRQS_ON
> -	ENABLE_INTERRUPTS(CLBR_NONE)
> -	pushq_cfi %rdi
> -	SCHEDULE_USER
> -	popq_cfi %rdi
> -	jmp sysret_check
>  
> -	/* Handle a signal */
> -sysret_signal:
> -	TRACE_IRQS_ON
> -	ENABLE_INTERRUPTS(CLBR_NONE)
> -#ifdef CONFIG_AUDITSYSCALL
> -	bt $TIF_SYSCALL_AUDIT,%edx
> -	jc sysret_audit
> -#endif
> -	/*
> -	 * We have a signal, or exit tracing or single-step.
> -	 * These all wind up with the iret return path anyway,
> -	 * so just join that path right now.
> -	 */
> +int_ret_from_sys_call_fixup:
>  	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
> -	jmp int_check_syscall_exit_work
> -
> -#ifdef CONFIG_AUDITSYSCALL
> -	/*
> -	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
> -	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
> -	 * masked off.
> -	 */
> -sysret_audit:
> -	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
> -	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
> -	setbe %al		/* 1 if so, 0 if not */
> -	movzbl %al,%edi		/* zero-extend that into %edi */
> -	call __audit_syscall_exit
> -	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
> -	jmp sysret_check
> -#endif	/* CONFIG_AUDITSYSCALL */
> +	jmp int_ret_from_sys_call

Ok, dumb question: what is replacing that audit functionality now?
Or are we remaining with the single path down syscall_trace_leave()?
AFAICT, the intention is to direct everything to int_ret_from_sys_call
after the syscall has been done so that paths can get simplified. Which
is a good thing, AFAICT.

Acked-by: Borislav Petkov <bp@suse.de>
Andy Lutomirski Jan. 9, 2015, 4:08 p.m. UTC | #2
On Fri, Jan 9, 2015 at 7:53 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Fri, Nov 07, 2014 at 03:58:19PM -0800, Andy Lutomirski wrote:
>> We used to optimize rescheduling and audit on syscall exit.  Now that
>> the full slow path is reasonably fast, remove these optimizations.
>>
>> This adds something like 10ns to the previously optimized paths on my
>> computer, presumably due mostly to SAVE_REST / RESTORE_REST.
>>
>> I think that we should eventually replace both the syscall and
>> non-paranoid interrupt exit slow paths with a pair of C functions
>> along the lines of the syscall entry hooks.
>>
>> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>> ---
>>  arch/x86/kernel/entry_64.S | 52 +++++-----------------------------------------
>>  1 file changed, 5 insertions(+), 47 deletions(-)
>>
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index a5afdf0f7fa4..222dc5c45ac3 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -427,15 +427,12 @@ system_call_fastpath:
>>   * Has incomplete stack frame and undefined top of stack.
>>   */
>>  ret_from_sys_call:
>> -     movl $_TIF_ALLWORK_MASK,%edi
>> -     /* edi: flagmask */
>> -sysret_check:
>> +     testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>> +     jnz int_ret_from_sys_call_fixup /* Go the the slow path */
>> +
>>       LOCKDEP_SYS_EXIT
>>       DISABLE_INTERRUPTS(CLBR_NONE)
>>       TRACE_IRQS_OFF
>> -     movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
>> -     andl %edi,%edx
>> -     jnz  sysret_careful
>>       CFI_REMEMBER_STATE
>>       /*
>>        * sysretq will re-enable interrupts:
>> @@ -449,49 +446,10 @@ sysret_check:
>>       USERGS_SYSRET64
>>
>>       CFI_RESTORE_STATE
>> -     /* Handle reschedules */
>> -     /* edx: work, edi: workmask */
>> -sysret_careful:
>> -     bt $TIF_NEED_RESCHED,%edx
>> -     jnc sysret_signal
>> -     TRACE_IRQS_ON
>> -     ENABLE_INTERRUPTS(CLBR_NONE)
>> -     pushq_cfi %rdi
>> -     SCHEDULE_USER
>> -     popq_cfi %rdi
>> -     jmp sysret_check
>>
>> -     /* Handle a signal */
>> -sysret_signal:
>> -     TRACE_IRQS_ON
>> -     ENABLE_INTERRUPTS(CLBR_NONE)
>> -#ifdef CONFIG_AUDITSYSCALL
>> -     bt $TIF_SYSCALL_AUDIT,%edx
>> -     jc sysret_audit
>> -#endif
>> -     /*
>> -      * We have a signal, or exit tracing or single-step.
>> -      * These all wind up with the iret return path anyway,
>> -      * so just join that path right now.
>> -      */
>> +int_ret_from_sys_call_fixup:
>>       FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
>> -     jmp int_check_syscall_exit_work
>> -
>> -#ifdef CONFIG_AUDITSYSCALL
>> -     /*
>> -      * Return fast path for syscall audit.  Call __audit_syscall_exit()
>> -      * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
>> -      * masked off.
>> -      */
>> -sysret_audit:
>> -     movq RAX-ARGOFFSET(%rsp),%rsi   /* second arg, syscall return value */
>> -     cmpq $-MAX_ERRNO,%rsi   /* is it < -MAX_ERRNO? */
>> -     setbe %al               /* 1 if so, 0 if not */
>> -     movzbl %al,%edi         /* zero-extend that into %edi */
>> -     call __audit_syscall_exit
>> -     movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
>> -     jmp sysret_check
>> -#endif       /* CONFIG_AUDITSYSCALL */
>> +     jmp int_ret_from_sys_call
>
> Ok, dumb question: what is replacing that audit functionality now?
> Or are we remaining with the single path down syscall_trace_leave()?
> AFAICT, the intention is to direct everything to int_ret_from_sys_call
> after the syscall has been done so that paths can get simplified. Which
> is a good thing, AFAICT.

Exactly.  int_ret_from_sys_call calls into the audit code in
syscall_trace_leave.  IIRC the current code sometimes calls the audit
exit hook twice, which seems like a bug to me.

--Andy

>
> Acked-by: Borislav Petkov <bp@suse.de>
>
> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --
diff mbox

Patch

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a5afdf0f7fa4..222dc5c45ac3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -427,15 +427,12 @@  system_call_fastpath:
  * Has incomplete stack frame and undefined top of stack.
  */
 ret_from_sys_call:
-	movl $_TIF_ALLWORK_MASK,%edi
-	/* edi:	flagmask */
-sysret_check:
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
+
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
-	andl %edi,%edx
-	jnz  sysret_careful
 	CFI_REMEMBER_STATE
 	/*
 	 * sysretq will re-enable interrupts:
@@ -449,49 +446,10 @@  sysret_check:
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
-	/* Handle reschedules */
-	/* edx:	work, edi: workmask */
-sysret_careful:
-	bt $TIF_NEED_RESCHED,%edx
-	jnc sysret_signal
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	pushq_cfi %rdi
-	SCHEDULE_USER
-	popq_cfi %rdi
-	jmp sysret_check
 
-	/* Handle a signal */
-sysret_signal:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-#ifdef CONFIG_AUDITSYSCALL
-	bt $TIF_SYSCALL_AUDIT,%edx
-	jc sysret_audit
-#endif
-	/*
-	 * We have a signal, or exit tracing or single-step.
-	 * These all wind up with the iret return path anyway,
-	 * so just join that path right now.
-	 */
+int_ret_from_sys_call_fixup:
 	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-	jmp int_check_syscall_exit_work
-
-#ifdef CONFIG_AUDITSYSCALL
-	/*
-	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
-	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
-	 * masked off.
-	 */
-sysret_audit:
-	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
-	setbe %al		/* 1 if so, 0 if not */
-	movzbl %al,%edi		/* zero-extend that into %edi */
-	call __audit_syscall_exit
-	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
-	jmp sysret_check
-#endif	/* CONFIG_AUDITSYSCALL */
+	jmp int_ret_from_sys_call
 
 	/* Do syscall tracing */
 tracesys: