diff mbox series

[RFC,10/15] x86/entry: Move irq tracing to C code

Message ID 20190919150809.446771597@linutronix.de (mailing list archive)
State New, archived
Headers show
Series entry: Provide generic implementation for host and guest entry/exit work | expand

Commit Message

Thomas Gleixner Sept. 19, 2019, 3:03 p.m. UTC
To prepare for converting the exit to usermode code to the generic version,
move the irqflags tracing into C code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/entry/common.c          |   10 ++++++++++
 arch/x86/entry/entry_32.S        |   11 +----------
 arch/x86/entry/entry_64.S        |   10 ++--------
 arch/x86/entry/entry_64_compat.S |   21 ---------------------
 4 files changed, 13 insertions(+), 39 deletions(-)

Comments

Peter Zijlstra Sept. 23, 2019, 8:47 a.m. UTC | #1
On Thu, Sep 19, 2019 at 05:03:24PM +0200, Thomas Gleixner wrote:
> To prepare for converting the exit to usermode code to the generic version,
> move the irqflags tracing into C code.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/entry/common.c          |   10 ++++++++++
>  arch/x86/entry/entry_32.S        |   11 +----------
>  arch/x86/entry/entry_64.S        |   10 ++--------
>  arch/x86/entry/entry_64_compat.S |   21 ---------------------
>  4 files changed, 13 insertions(+), 39 deletions(-)
> 
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -102,6 +102,8 @@ static void exit_to_usermode_loop(struct
>  	struct thread_info *ti = current_thread_info();
>  	u32 cached_flags;
>  
> +	trace_hardirqs_off();

Bah.. so this gets called from:

 - C code, with IRQs disabled
 - entry_64.S:error_exit
 - entry_32.S:resume_userspace

The first obviously doesn't need this annotation, but this patch doesn't
remove the TRACE_IRQS_OFF from entry_64.S and only the 32bit case is
changed.

Is that entry_64.S case an oversight, or do we need an extensive comment
on this one?

>  	addr_limit_user_check();
>  
>  	lockdep_assert_irqs_disabled();
> @@ -137,6 +139,8 @@ static void exit_to_usermode_loop(struct
>  	user_enter_irqoff();
>  
>  	mds_user_clear_cpu_buffers();
> +

	/* Return to userspace (IRET) will re-enable interrupts */
> +	trace_hardirqs_on();
>  }
Thomas Gleixner Sept. 23, 2019, 10:27 a.m. UTC | #2
On Mon, 23 Sep 2019, Peter Zijlstra wrote:

> On Thu, Sep 19, 2019 at 05:03:24PM +0200, Thomas Gleixner wrote:
> > To prepare for converting the exit to usermode code to the generic version,
> > move the irqflags tracing into C code.
> > 
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > ---
> >  arch/x86/entry/common.c          |   10 ++++++++++
> >  arch/x86/entry/entry_32.S        |   11 +----------
> >  arch/x86/entry/entry_64.S        |   10 ++--------
> >  arch/x86/entry/entry_64_compat.S |   21 ---------------------
> >  4 files changed, 13 insertions(+), 39 deletions(-)
> > 
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -102,6 +102,8 @@ static void exit_to_usermode_loop(struct
> >  	struct thread_info *ti = current_thread_info();
> >  	u32 cached_flags;
> >  
> > +	trace_hardirqs_off();
> 
> Bah.. so this gets called from:
> 
>  - C code, with IRQs disabled
>  - entry_64.S:error_exit
>  - entry_32.S:resume_userspace
> 
> The first obviously doesn't need this annotation, but this patch doesn't
> remove the TRACE_IRQS_OFF from entry_64.S and only the 32bit case is
> changed.
> 
> Is that entry_64.S case an oversight, or do we need an extensive comment
> on this one?

Lemme stare at that again. At some point I probably lost track in that maze.

Thanks,

	tglx
Peter Zijlstra Sept. 23, 2019, 11:49 a.m. UTC | #3
On Mon, Sep 23, 2019 at 12:27:47PM +0200, Thomas Gleixner wrote:
> On Mon, 23 Sep 2019, Peter Zijlstra wrote:
> 
> > On Thu, Sep 19, 2019 at 05:03:24PM +0200, Thomas Gleixner wrote:
> > > To prepare for converting the exit to usermode code to the generic version,
> > > move the irqflags tracing into C code.
> > > 
> > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > > ---
> > >  arch/x86/entry/common.c          |   10 ++++++++++
> > >  arch/x86/entry/entry_32.S        |   11 +----------
> > >  arch/x86/entry/entry_64.S        |   10 ++--------
> > >  arch/x86/entry/entry_64_compat.S |   21 ---------------------
> > >  4 files changed, 13 insertions(+), 39 deletions(-)
> > > 
> > > --- a/arch/x86/entry/common.c
> > > +++ b/arch/x86/entry/common.c
> > > @@ -102,6 +102,8 @@ static void exit_to_usermode_loop(struct
> > >  	struct thread_info *ti = current_thread_info();
> > >  	u32 cached_flags;
> > >  
> > > +	trace_hardirqs_off();
> > 
> > Bah.. so this gets called from:
> > 
> >  - C code, with IRQs disabled
> >  - entry_64.S:error_exit
> >  - entry_32.S:resume_userspace
> > 
> > The first obviously doesn't need this annotation, but this patch doesn't
> > remove the TRACE_IRQS_OFF from entry_64.S and only the 32bit case is
> > changed.
> > 
> > Is that entry_64.S case an oversight, or do we need an extensive comment
> > on this one?
> 
> Lemme stare at that again. At some point I probably lost track in that maze.

While walking the kids to school I wondered WTH we need to call
TRACE_IRQS_OFF in the first place. If this is the return from exception
path, interrupts had better be disabled already (in exception enter).

For entry_64.S we have:

  - idtentry_part; which does TRACE_IRQS_OFF at the start and error_exit
    at the end.

  - xen_do_hypervisor_callback, xen_failsafe_callback -- which are
    confusing.

So in the normal case, it appears we can simply do:

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b7c3ea4cb19d..e9cf59ac554e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1368,8 +1368,6 @@ END(error_entry)
 
 ENTRY(error_exit)
 	UNWIND_HINT_REGS
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
 	jmp	retint_user

and all should be well. This leaves Xen...

For entry_32.S it looks like nothing uses 'resume_userspace' so that
ENTRY can go away. Which then leaves:

 * ret_from_intr:
  - common_spurious: TRACE_IRQS_OFF
  - common_interrupt: TRACE_IRQS_OFF
  - BUILD_INTERRUPT3: TRACE_IRQS_OFF
  - xen_do_upcall: ...

 * ret_from_exception:
  - xen_failsafe_callback: ...
  - common_exception_read_cr2: TRACE_IRQS_OFF
  - common_exception: TRACE_IRQS_OFF
  - int3: TRACE_IRQS_OFF

Which again suggests:

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index f83ca5aa8b77..7a19e7413a8e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -825,9 +825,6 @@ END(ret_from_fork)
 	cmpl	$USER_RPL, %eax
 	jb	restore_all_kernel		# not returning to v8086 or userspace
 
-ENTRY(resume_userspace)
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	prepare_exit_to_usermode
 	jmp	restore_all

with the notable exception (oh teh pun!) being Xen... _again_.

With these patchlets on, we'd want prepare_exit_to_usermode() to
validate the IRQ state, but AFAICT it _should_ all just 'work' (famous
last words).

Cc Juergen because Xen
Peter Zijlstra Sept. 23, 2019, 11:55 a.m. UTC | #4
On Mon, Sep 23, 2019 at 01:49:20PM +0200, Peter Zijlstra wrote:
> While walking the kids to school I wondered WTH we need to call
> TRACE_IRQS_OFF in the first place. If this is the return from exception
> path, interrupts had better be disabled already (in exception enter).
> 
> For entry_64.S we have:
> 
>   - idtentry_part; which does TRACE_IRQS_OFF at the start and error_exit
>     at the end.
> 
>   - xen_do_hypervisor_callback, xen_failsafe_callback -- which are
>     confusing.
> 
> So in the normal case, it appears we can simply do:
> 
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index b7c3ea4cb19d..e9cf59ac554e 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -1368,8 +1368,6 @@ END(error_entry)
>  
>  ENTRY(error_exit)
>  	UNWIND_HINT_REGS
> -	DISABLE_INTERRUPTS(CLBR_ANY)
> -	TRACE_IRQS_OFF
>  	testb	$3, CS(%rsp)
>  	jz	retint_kernel
>  	jmp	retint_user
> 
> and all should be well. This leaves Xen...
> 
> For entry_32.S it looks like nothing uses 'resume_userspace' so that
> ENTRY can go away. Which then leaves:
> 
>  * ret_from_intr:
>   - common_spurious: TRACE_IRQS_OFF
>   - common_interrupt: TRACE_IRQS_OFF
>   - BUILD_INTERRUPT3: TRACE_IRQS_OFF
>   - xen_do_upcall: ...
> 
>  * ret_from_exception:
>   - xen_failsafe_callback: ...
>   - common_exception_read_cr2: TRACE_IRQS_OFF
>   - common_exception: TRACE_IRQS_OFF
>   - int3: TRACE_IRQS_OFF
> 
> Which again suggests:
> 
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index f83ca5aa8b77..7a19e7413a8e 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -825,9 +825,6 @@ END(ret_from_fork)
>  	cmpl	$USER_RPL, %eax
>  	jb	restore_all_kernel		# not returning to v8086 or userspace
>  
> -ENTRY(resume_userspace)
> -	DISABLE_INTERRUPTS(CLBR_ANY)
> -	TRACE_IRQS_OFF
>  	movl	%esp, %eax
>  	call	prepare_exit_to_usermode
>  	jmp	restore_all
> 
> with the notable exception (oh teh pun!) being Xen... _again_.
> 
> With these patchlets on, we'd want prepare_exit_to_usermode() to
> validate the IRQ state, but AFAICT it _should_ all just 'work' (famous
> last words).
> 
> Cc Juergen because Xen

Arrgh.. faults!! they do local_irq_enable() but never disable them
again. Arguably those functions should be symmetric and restore IF when
they muck with it instead of relying on the exit path fixing things up.
Peter Zijlstra Sept. 23, 2019, 12:10 p.m. UTC | #5
On Mon, Sep 23, 2019 at 01:55:51PM +0200, Peter Zijlstra wrote:
> On Mon, Sep 23, 2019 at 01:49:20PM +0200, Peter Zijlstra wrote:
> > While walking the kids to school I wondered WTH we need to call
> > TRACE_IRQS_OFF in the first place. If this is the return from exception
> > path, interrupts had better be disabled already (in exception enter).
> > 
> > For entry_64.S we have:
> > 
> >   - idtentry_part; which does TRACE_IRQS_OFF at the start and error_exit
> >     at the end.
> > 
> >   - xen_do_hypervisor_callback, xen_failsafe_callback -- which are
> >     confusing.
> > 
> > So in the normal case, it appears we can simply do:
> > 
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index b7c3ea4cb19d..e9cf59ac554e 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -1368,8 +1368,6 @@ END(error_entry)
> >  
> >  ENTRY(error_exit)
> >  	UNWIND_HINT_REGS
> > -	DISABLE_INTERRUPTS(CLBR_ANY)
> > -	TRACE_IRQS_OFF
> >  	testb	$3, CS(%rsp)
> >  	jz	retint_kernel
> >  	jmp	retint_user
> > 
> > and all should be well. This leaves Xen...
> > 
> > For entry_32.S it looks like nothing uses 'resume_userspace' so that
> > ENTRY can go away. Which then leaves:
> > 
> >  * ret_from_intr:
> >   - common_spurious: TRACE_IRQS_OFF
> >   - common_interrupt: TRACE_IRQS_OFF
> >   - BUILD_INTERRUPT3: TRACE_IRQS_OFF
> >   - xen_do_upcall: ...
> > 
> >  * ret_from_exception:
> >   - xen_failsafe_callback: ...
> >   - common_exception_read_cr2: TRACE_IRQS_OFF
> >   - common_exception: TRACE_IRQS_OFF
> >   - int3: TRACE_IRQS_OFF
> > 
> > Which again suggests:
> > 
> > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> > index f83ca5aa8b77..7a19e7413a8e 100644
> > --- a/arch/x86/entry/entry_32.S
> > +++ b/arch/x86/entry/entry_32.S
> > @@ -825,9 +825,6 @@ END(ret_from_fork)
> >  	cmpl	$USER_RPL, %eax
> >  	jb	restore_all_kernel		# not returning to v8086 or userspace
> >  
> > -ENTRY(resume_userspace)
> > -	DISABLE_INTERRUPTS(CLBR_ANY)
> > -	TRACE_IRQS_OFF
> >  	movl	%esp, %eax
> >  	call	prepare_exit_to_usermode
> >  	jmp	restore_all
> > 
> > with the notable exception (oh teh pun!) being Xen... _again_.
> > 
> > With these patchlets on, we'd want prepare_exit_to_usermode() to
> > validate the IRQ state, but AFAICT it _should_ all just 'work' (famous
> > last words).
> > 
> > Cc Juergen because Xen
> 
> Arrgh.. faults!! they do local_irq_enable() but never disable them
> again. Arguably those functions should be symmetric and restore IF when
> they muck with it instead of relying on the exit path fixing things up.

---
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index f83ca5aa8b77..7a19e7413a8e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -825,9 +825,6 @@ END(ret_from_fork)
 	cmpl	$USER_RPL, %eax
 	jb	restore_all_kernel		# not returning to v8086 or userspace
 
-ENTRY(resume_userspace)
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	prepare_exit_to_usermode
 	jmp	restore_all
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b7c3ea4cb19d..e9cf59ac554e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1368,8 +1368,6 @@ END(error_entry)
 
 ENTRY(error_exit)
 	UNWIND_HINT_REGS
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
 	jmp	retint_user
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4bb0f8447112..663d44c68f67 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -276,6 +276,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
 			NOTIFY_STOP) {
 		cond_local_irq_enable(regs);
 		do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
+		cond_local_irq_disable(regs);
 	}
 }
 
@@ -501,6 +502,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 		die("bounds", regs, error_code);
 	}
 
+	cond_local_irq_disable(regs);
 	return;
 
 exit_trap:
@@ -512,6 +514,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	 * time..
 	 */
 	do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
+	cond_local_irq_disable(regs);
 }
 
 dotraplinkage void
@@ -525,19 +528,19 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	if (static_cpu_has(X86_FEATURE_UMIP)) {
 		if (user_mode(regs) && fixup_umip_exception(regs))
-			return;
+			goto exit_trap;
 	}
 
 	if (v8086_mode(regs)) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-		return;
+		goto exit_trap;
 	}
 
 	tsk = current;
 	if (!user_mode(regs)) {
 		if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
-			return;
+			goto exit_trap;
 
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
@@ -549,12 +552,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
 		 */
 		if (!preemptible() && kprobe_running() &&
 		    kprobe_fault_handler(regs, X86_TRAP_GP))
-			return;
+			goto exit_trap;
 
 		if (notify_die(DIE_GPF, desc, regs, error_code,
 			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
 			die(desc, regs, error_code);
-		return;
+		goto exit_trap;
 	}
 
 	tsk->thread.error_code = error_code;
@@ -563,6 +566,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
 
 	force_sig(SIGSEGV);
+exit_trap:
+	cond_local_irq_disable(regs);
 }
 NOKPROBE_SYMBOL(do_general_protection);
 
@@ -783,9 +788,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
-		cond_local_irq_disable(regs);
-		debug_stack_usage_dec();
-		goto exit;
+		goto exit_irq;
 	}
 
 	if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
@@ -802,6 +805,8 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	si_code = get_si_code(tsk->thread.debugreg6);
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(regs, error_code, si_code);
+
+exit_irq:
 	cond_local_irq_disable(regs);
 	debug_stack_usage_dec();
 
@@ -827,7 +832,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 
 	if (!user_mode(regs)) {
 		if (fixup_exception(regs, trapnr, error_code, 0))
-			return;
+			goto exit_trap;
 
 		task->thread.error_code = error_code;
 		task->thread.trap_nr = trapnr;
@@ -835,7 +840,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 		if (notify_die(DIE_TRAP, str, regs, error_code,
 					trapnr, SIGFPE) != NOTIFY_STOP)
 			die(str, regs, error_code);
-		return;
+		goto exit_trap;
 	}
 
 	/*
@@ -849,10 +854,12 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	si_code = fpu__exception_code(fpu, trapnr);
 	/* Retry when we get spurious exceptions: */
 	if (!si_code)
-		return;
+		goto exit_trap;
 
 	force_sig_fault(SIGFPE, si_code,
 			(void __user *)uprobe_get_trap_addr(regs));
+exit_trap:
+	cond_local_irq_disable(regs);
 }
 
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
@@ -889,6 +896,8 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 
 		info.regs = regs;
 		math_emulate(&info);
+
+		cond_local_irq_disable(regs);
 		return;
 	}
 #endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9ceacd1156db..501cc36a3d6a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1500,10 +1500,13 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
 		return;
 
 	/* Was the fault on kernel-controlled part of the address space? */
-	if (unlikely(fault_in_kernel_space(address)))
+	if (unlikely(fault_in_kernel_space(address))) {
 		do_kern_addr_fault(regs, hw_error_code, address);
-	else
+	} else {
 		do_user_addr_fault(regs, hw_error_code, address);
+		if (regs->flags & X86_EFLAGS_IF)
+			local_irq_disable();
+	}
 }
 NOKPROBE_SYMBOL(__do_page_fault);
Andy Lutomirski Sept. 23, 2019, 5:24 p.m. UTC | #6
On Mon, Sep 23, 2019 at 5:10 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Sep 23, 2019 at 01:55:51PM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 23, 2019 at 01:49:20PM +0200, Peter Zijlstra wrote:
> > > While walking the kids to school I wondered WTH we need to call
> > > TRACE_IRQS_OFF in the first place. If this is the return from exception
> > > path, interrupts had better be disabled already (in exception enter).
> > >
> > > For entry_64.S we have:
> > >
> > >   - idtentry_part; which does TRACE_IRQS_OFF at the start and error_exit
> > >     at the end.
> > >
> > >   - xen_do_hypervisor_callback, xen_failsafe_callback -- which are
> > >     confusing.
> > >
> > > So in the normal case, it appears we can simply do:
> > >
> > > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > > index b7c3ea4cb19d..e9cf59ac554e 100644
> > > --- a/arch/x86/entry/entry_64.S
> > > +++ b/arch/x86/entry/entry_64.S
> > > @@ -1368,8 +1368,6 @@ END(error_entry)
> > >
> > >  ENTRY(error_exit)
> > >     UNWIND_HINT_REGS
> > > -   DISABLE_INTERRUPTS(CLBR_ANY)
> > > -   TRACE_IRQS_OFF
> > >     testb   $3, CS(%rsp)
> > >     jz      retint_kernel
> > >     jmp     retint_user
> > >
> > > and all should be well. This leaves Xen...
> > >
> > > For entry_32.S it looks like nothing uses 'resume_userspace' so that
> > > ENTRY can go away. Which then leaves:
> > >
> > >  * ret_from_intr:
> > >   - common_spurious: TRACE_IRQS_OFF
> > >   - common_interrupt: TRACE_IRQS_OFF
> > >   - BUILD_INTERRUPT3: TRACE_IRQS_OFF
> > >   - xen_do_upcall: ...
> > >
> > >  * ret_from_exception:
> > >   - xen_failsafe_callback: ...
> > >   - common_exception_read_cr2: TRACE_IRQS_OFF
> > >   - common_exception: TRACE_IRQS_OFF
> > >   - int3: TRACE_IRQS_OFF
> > >
> > > Which again suggests:
> > >
> > > diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> > > index f83ca5aa8b77..7a19e7413a8e 100644
> > > --- a/arch/x86/entry/entry_32.S
> > > +++ b/arch/x86/entry/entry_32.S
> > > @@ -825,9 +825,6 @@ END(ret_from_fork)
> > >     cmpl    $USER_RPL, %eax
> > >     jb      restore_all_kernel              # not returning to v8086 or userspace
> > >
> > > -ENTRY(resume_userspace)
> > > -   DISABLE_INTERRUPTS(CLBR_ANY)
> > > -   TRACE_IRQS_OFF
> > >     movl    %esp, %eax
> > >     call    prepare_exit_to_usermode
> > >     jmp     restore_all
> > >
> > > with the notable exception (oh teh pun!) being Xen... _again_.
> > >
> > > With these patchlets on, we'd want prepare_exit_to_usermode() to
> > > validate the IRQ state, but AFAICT it _should_ all just 'work' (famous
> > > last words).
> > >
> > > Cc Juergen because Xen
> >
> > Arrgh.. faults!! they do local_irq_enable() but never disable them
> > again. Arguably those functions should be symmetric and restore IF when
> > they muck with it instead of relying on the exit path fixing things up.
>
> ---
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index f83ca5aa8b77..7a19e7413a8e 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -825,9 +825,6 @@ END(ret_from_fork)
>         cmpl    $USER_RPL, %eax
>         jb      restore_all_kernel              # not returning to v8086 or userspace

...

Yes, please, but can you add an assertion under CONFIG_DEBUG_ENTRY
that IRQs are actually off?
Josh Poimboeuf Sept. 26, 2019, 2:59 a.m. UTC | #7
On Thu, Sep 19, 2019 at 05:03:24PM +0200, Thomas Gleixner wrote:
>  ENTRY(error_exit)
> -	UNWIND_HINT_REGS
>  	DISABLE_INTERRUPTS(CLBR_ANY)
>  	TRACE_IRQS_OFF
> +	UNWIND_HINT_REGS
>  	testb	$3, CS(%rsp)
>  	jz	retint_kernel
>  	jmp	retint_user

I don't think this UNWIND_HINT_REGS needs to be moved?
diff mbox series

Patch

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -102,6 +102,8 @@  static void exit_to_usermode_loop(struct
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	trace_hardirqs_off();
+
 	addr_limit_user_check();
 
 	lockdep_assert_irqs_disabled();
@@ -137,6 +139,8 @@  static void exit_to_usermode_loop(struct
 	user_enter_irqoff();
 
 	mds_user_clear_cpu_buffers();
+
+	trace_hardirqs_on();
 }
 
 /*
@@ -154,6 +158,8 @@  static void exit_to_usermode_loop(struct
 #ifdef CONFIG_X86_64
 __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
+	/* User to kernel transition disabled interrupts. */
+	trace_hardirqs_off();
 	enter_from_user_mode();
 	local_irq_enable();
 
@@ -221,6 +227,7 @@  static __always_inline void do_syscall_3
 /* Handles int $0x80 */
 __visible void do_int80_syscall_32(struct pt_regs *regs)
 {
+	trace_hardirqs_off();
 	enter_from_user_mode();
 	local_irq_enable();
 	do_syscall_32_irqs_on(regs);
@@ -237,6 +244,9 @@  static __always_inline void do_syscall_3
 	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
 		vdso_image_32.sym_int80_landing_pad;
 
+	/* User to kernel transition disabled interrupts. */
+	trace_hardirqs_off();
+
 	/*
 	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
 	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -827,7 +827,6 @@  END(ret_from_fork)
 
 ENTRY(resume_userspace)
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 	movl	%esp, %eax
 	call	prepare_exit_to_usermode
 	jmp	restore_all
@@ -1049,12 +1048,6 @@  ENTRY(entry_INT80_32)
 
 	SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1	/* save rest */
 
-	/*
-	 * User mode is traced as though IRQs are on, and the interrupt gate
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
 	movl	%esp, %eax
 	call	do_int80_syscall_32
 .Lsyscall_32_done:
@@ -1062,11 +1055,8 @@  ENTRY(entry_INT80_32)
 	STACKLEAK_ERASE
 
 restore_all:
-	TRACE_IRQS_IRET
 	SWITCH_TO_ENTRY_STACK
-.Lrestore_all_notrace:
 	CHECK_AND_APPLY_ESPFIX
-.Lrestore_nocheck:
 	/* Switch back to user CR3 */
 	SWITCH_TO_USER_CR3 scratch_reg=%eax
 
@@ -1086,6 +1076,7 @@  ENTRY(entry_INT80_32)
 restore_all_kernel:
 #ifdef CONFIG_PREEMPTION
 	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_OFF
 	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	.Lno_preempt
 	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -167,15 +167,11 @@  GLOBAL(entry_SYSCALL_64_after_hwframe)
 
 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
-	TRACE_IRQS_OFF
-
 	/* IRQs are off. */
 	movq	%rax, %rdi
 	movq	%rsp, %rsi
 	call	do_syscall_64		/* returns with IRQs disabled */
 
-	TRACE_IRQS_IRETQ		/* we're about to change IF */
-
 	/*
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.  If we're not,
@@ -342,7 +338,6 @@  ENTRY(ret_from_fork)
 	UNWIND_HINT_REGS
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
-	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	jmp	swapgs_restore_regs_and_return_to_usermode
 
 1:
@@ -608,7 +603,6 @@  END(common_spurious)
 	/* 0(%rsp): old RSP */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
 
 	LEAVE_IRQ_STACK
 
@@ -619,7 +613,6 @@  END(common_spurious)
 GLOBAL(retint_user)
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
-	TRACE_IRQS_IRETQ
 
 GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 #ifdef CONFIG_DEBUG_ENTRY
@@ -666,6 +659,7 @@  GLOBAL(swapgs_restore_regs_and_return_to
 retint_kernel:
 #ifdef CONFIG_PREEMPTION
 	/* Interrupts are off */
+	TRACE_IRQS_OFF
 	/* Check if we need preemption */
 	btl	$9, EFLAGS(%rsp)		/* were interrupts off? */
 	jnc	1f
@@ -1367,9 +1361,9 @@  ENTRY(error_entry)
 END(error_entry)
 
 ENTRY(error_exit)
-	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
+	UNWIND_HINT_REGS
 	testb	$3, CS(%rsp)
 	jz	retint_kernel
 	jmp	retint_user
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -129,12 +129,6 @@  ENTRY(entry_SYSENTER_compat)
 	jnz	.Lsysenter_fix_flags
 .Lsysenter_flags_fixed:
 
-	/*
-	 * User mode is traced as though IRQs are on, and SYSENTER
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
 	/* XEN PV guests always use IRET path */
@@ -247,12 +241,6 @@  GLOBAL(entry_SYSCALL_compat_after_hwfram
 	pushq   $0			/* pt_regs->r15 = 0 */
 	xorl	%r15d, %r15d		/* nospec   r15 */
 
-	/*
-	 * User mode is traced as though IRQs are on, and SYSENTER
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
 	movq	%rsp, %rdi
 	call	do_fast_syscall_32
 	/* XEN PV guests always use IRET path */
@@ -266,7 +254,6 @@  GLOBAL(entry_SYSCALL_compat_after_hwfram
 	 * stack. So let's erase the thread stack right now.
 	 */
 	STACKLEAK_ERASE
-	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
 	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
 	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
@@ -403,17 +390,9 @@  ENTRY(entry_INT80_compat)
 	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
-	/*
-	 * User mode is traced as though IRQs are on, and the interrupt
-	 * gate turned them off.
-	 */
-	TRACE_IRQS_OFF
-
 	movq	%rsp, %rdi
 	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
-	/* Go back to user mode. */
-	TRACE_IRQS_ON
 	jmp	swapgs_restore_regs_and_return_to_usermode
 END(entry_INT80_compat)