@@ -67,9 +67,20 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
+.macro RETINT_PREEMPT
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lend_\@
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lend_\@
+ call preempt_schedule_irq
+.Lend_\@:
+#endif
+.endm
+
.macro TRACE_IRQS_IRET
#ifdef CONFIG_TRACE_IRQFLAGS
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
@@ -753,7 +764,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -763,19 +774,6 @@ ENTRY(resume_userspace)
jmp restore_all
END(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
-.Lneed_resched:
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz restore_all_kernel
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_kernel
- call preempt_schedule_irq
- jmp .Lneed_resched
-END(resume_kernel)
-#endif
-
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1026,6 +1024,7 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+ RETINT_PREEMPT
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1476,6 +1475,94 @@ END(nmi)
ENTRY(int3)
ASM_CLAC
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 8(%esp)
+ jnz .Lfrom_usermode_no_gap
+#endif
+ testl $SEGMENT_RPL_MASK, 4(%esp)
+ jnz .Lfrom_usermode_no_gap
+
+ /*
+ * Here from kernel mode; so the (exception) stack looks like:
+ *
+ * 12(esp) - <previous context>
+ * 8(esp) - flags
+ * 4(esp) - cs
+ * 0(esp) - ip
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 3 enties as gap:
+ *
+ * 32(esp) - <previous context>
+ * 28(esp) - orig_flags / gap
+ * 24(esp) - orig_cs / gap
+ * 20(esp) - orig_ip / gap
+ * 16(esp) - ss
+ * 12(esp) - sp
+ * 8(esp) - flags
+ * 4(esp) - cs
+ * 0(esp) - ip
+ */
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ pushl 4*4(%esp) # flags
+ pushl 4*4(%esp) # cs
+ pushl 4*4(%esp) # ip
+
+ add $16, 12(%esp) # point sp back at the previous context
+
+ pushl $-1 # orig_eax; mark as interrupt
+
+ SAVE_ALL
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ xorl %edx, %edx # zero error code
+ movl %esp, %eax # pt_regs pointer
+ call do_int3
+
+ RETINT_PREEMPT
+ TRACE_IRQS_IRET
+ /*
+ * If we really never INT3 from entry code, it looks like
+ * we can skip this one.
+ PARANOID_EXIT_TO_KERNEL_MODE
+ */
+ BUG_IF_WRONG_CR3
+ RESTORE_REGS 4 # consume orig_eax
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+
+ jmp .Lirq_return
+
+.Lfrom_usermode_no_gap:
+
pushl $-1 # mark this as an int
SAVE_ALL switch_stacks=1
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -899,6 +899,16 @@ ENTRY(\sym)
jnz .Lfrom_usermode_switch_stack_\@
.endif
+ .if \create_gap == 1
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_no_gap_\@
+ .rept 6
+ pushq 5*8(%rsp)
+ .endr
+ UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+ .endif
+
.if \paranoid
call paranoid_entry
.else
@@ -1130,7 +1140,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3 do_int3 has_error_code=0
+idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN_PV
@@ -39,4 +39,24 @@ extern int poke_int3_handler(struct pt_regs *regs);
extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
extern int after_bootmem;
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+ regs->sp -= sizeof(unsigned long);
+ *(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+ regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+ int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+ int3_emulate_jmp(regs, func);
+}
+
#endif /* _ASM_X86_TEXT_PATCHING_H */
@@ -171,8 +171,12 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
unsigned long sp = (unsigned long)®s->sp;
u32 *prev_esp;
- if (context == (sp & ~(THREAD_SIZE - 1)))
+ if (context == (sp & ~(THREAD_SIZE - 1))) {
+ /* int3 code adds a gap */
+ if (sp == regs->sp - 5*4)
+ return regs->sp;
return sp;
+ }
prev_esp = (u32 *)(context);
if (*prev_esp)