diff mbox series

[22/23] x86/fpu: Don't restore the FPU state directly from userland in __fpu__restore_sig()

Message ID 20181107194858.9380-23-bigeasy@linutronix.de (mailing list archive)
State New, archived
Headers show
Series [01/23] x86/fpu: Use ULL for shift in xfeature_uncompacted_offset() | expand

Commit Message

Sebastian Andrzej Siewior Nov. 7, 2018, 7:48 p.m. UTC
__fpu__restore_sig() restores the CPU's FPU state directly from
userland. If we restore registers on return to userland then we can't
load them directly from userland because a context switch/BH could
destroy them.

Restore the FPU registers after they have been copied from userland.
__fpregs_changes_begin() ensures that they are not modified while beeing
worked on. TIF_NEED_FPU_LOAD is clreared we want to keep our state, not
the saved state.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/x86/include/asm/fpu/internal.h | 34 -----------------------------
 arch/x86/kernel/fpu/signal.c        | 33 ++++++++++++++++++----------
 2 files changed, 22 insertions(+), 45 deletions(-)

Comments

Andy Lutomirski Nov. 8, 2018, 6:25 p.m. UTC | #1
On Wed, Nov 7, 2018 at 11:49 AM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> __fpu__restore_sig() restores the CPU's FPU state directly from
> userland. If we restore registers on return to userland then we can't
> load them directly from userland because a context switch/BH could
> destroy them.
>
> Restore the FPU registers after they have been copied from userland.
> __fpregs_changes_begin() ensures that they are not modified while beeing
> worked on. TIF_NEED_FPU_LOAD is clreared we want to keep our state, not
> the saved state.

I'm conceptually okay with this change, but what happens if the
registers that are copied into the kernel are garbage?  We used to
fail the restore and presumably kill the task.  What happens now?
Sebastian Andrzej Siewior Nov. 9, 2018, 7:09 p.m. UTC | #2
On 2018-11-08 10:25:24 [-0800], Andy Lutomirski wrote:
> On Wed, Nov 7, 2018 at 11:49 AM Sebastian Andrzej Siewior
> <bigeasy@linutronix.de> wrote:
> >
> > __fpu__restore_sig() restores the CPU's FPU state directly from
> > userland. If we restore registers on return to userland then we can't
> > load them directly from userland because a context switch/BH could
> > destroy them.
> >
> > Restore the FPU registers after they have been copied from userland.
> > __fpregs_changes_begin() ensures that they are not modified while beeing
> > worked on. TIF_NEED_FPU_LOAD is clreared we want to keep our state, not
> > the saved state.
> 
> I'm conceptually okay with this change, but what happens if the
> registers that are copied into the kernel are garbage?  We used to
> fail the restore and presumably kill the task.  What happens now?

What means garbage? Assume you mean something like memset(xstate, 0xff,)
then this would happen:

| ------------[ cut here ]------------
| Bad FPU state detected at __fpu__restore_sig+0x2a3/0x660, reinitializing FPU registers.
| WARNING: CPU: 0 PID: 1687 at arch/x86/mm/extable.c:113 ex_handler_fprestore+0x60/0x70
| Modules linked in:
| CPU: 0 PID: 1687 Comm: ssltc Not tainted 4.20.0-rc1+ #116
| Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.1-1 04/01/2014
| RIP: 0010:ex_handler_fprestore+0x60/0x70
| Code: 00 00 00 5d c3 48 0f ae 0d 0d 13 2c 01 b8 01 00 00 00 5d c3 48 89 c6 48 c7 c7 40 39 02 82 c6 05 c3 1d 2b 01 01 e8 0a 01 01 00 <0f> 0b eb b9 66 66 2e 0f 1f 84 00 00 7
| RSP: 0018:ffffc90000563cf8 EFLAGS: 00010282
| RAX: 0000000000000000 RBX: ffffc90000563d68 RCX: 0000000000000000
| RDX: 0000000000000046 RSI: 0000000000000000 RDI: ffff88003a1516c0
| RBP: ffffc90000563cf8 R08: 0000000000000000 R09: 0000000000000000
| R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000000d
| R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
| FS:  00007f17678f1b80(0000) GS:ffff88003e800000(0000) knlGS:0000000000000000
| CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
| CR2: 00007f1767dc2000 CR3: 000000003d0b6003 CR4: 0000000000060ef0
| Call Trace:
|  fixup_exception+0x45/0x5c
|  do_general_protection+0x61/0x1a0
|  general_protection+0x1e/0x30
| RIP: 0010:__fpu__restore_sig+0x2a3/0x660
| Code: 00 00 48 8b 95 48 ff ff ff 48 f7 d2 48 21 d0 0f 85 73 03 00 00 48 8b 85 48 ff ff ff 4c 89 f7 48 89 c2 48 c1 ea 20 48 0f ae 2f <65> 48 8b 04 25 00 4f 01 00 f0 80 60 e
| RSP: 0018:ffffc90000563e18 EFLAGS: 00010246
| RAX: 0000000000000007 RBX: 00007ffe7f19df40 RCX: 00000000000031d7
| RDX: 0000000000000000 RSI: 0000000000000200 RDI: ffff88003a152a40
| RBP: ffffc90000563ed0 R08: 0000000000000000 R09: 0000000000000000
| R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
| R13: ffff88003a1516c0 R14: ffff88003a152a40 R15: ffff88003a152a00
|  ? __fpu__restore_sig+0x261/0x660
|  ? trace_hardirqs_on+0x22/0x110
|  fpu__restore_sig+0x28/0x40
|  __ia32_sys_rt_sigreturn+0x218/0x2aa
|  do_syscall_64+0x50/0x1a0
|  entry_SYSCALL_64_after_hwframe+0x49/0xbe
| RIP: 0033:0x7f1767ca8a7b
| Code: 31 d8 0f a4 ed 05 c5 79 7f 4c 24 30 01 fa 21 c6 c5 b9 72 d4 1f 31 d8 01 ea 0f ac ed 07 31 de c5 a9 73 fc 0c c5 d9 fe e4 89 d7 <03> 4c 24 08 31 c5 0f a4 d2 05 c4 c1 1
| RSP: 002b:00007ffe7f19e340 EFLAGS: 00000282
| RAX: 000000001c9f00ab RBX: 00000000837732a0 RCX: 000000001bff4f26
| RDX: 0000000037535a7c RSI: 00000000979700a8 RDI: 0000000037535a7c
| RBP: 00000000053caff3 R08: 00005615ffd442a0 R09: 00007f1767dc54c0
| R10: 00007f1767dc6000 R11: 00007ffe7f19e3c8 R12: 00007f1767dc2000
| R13: 00005615ff6c30a0 R14: 00007f1767cab080 R15: 00007f1767d95100
| irq event stamp: 12775
| hardirqs last  enabled at (12774): [<ffffffff810de72c>] vprintk_emit+0xac/0x270
| hardirqs last disabled at (12775): [<ffffffff81001c01>] trace_hardirqs_off_thunk+0x1a/0x1c
| softirqs last  enabled at (12756): [<ffffffff81c003a2>] __do_softirq+0x3a2/0x4d1
| softirqs last disabled at (12760): [<ffffffff8102a210>] __fpu__restore_sig+0x230/0x660
| ---[ end trace 163c456a84752e26 ]---

and the task continues. Before we had this:
| BUG: GPF in non-whitelisted uaccess (non-canonical address?)
| general protection fault: 0000 [#2] PREEMPT SMP DEBUG_PAGEALLOC PTI
| CPU: 1 PID: 1687 Comm: ssltc Tainted: G      D           4.20.0-rc1+ #120
| Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.1-1 04/01/2014
| RIP: 0010:__fpu__restore_sig+0x2fa/0x710
| Code: 34 8b 95 5c ff ff ff 85 d2 75 2a 4c 89 fa 48 f7 d2 48 21 d0 0f 85 b8 03 00 00 66 66 90 4c 89 fa 48 89 df 44 89 f8 48 c1 ea 20 <48> 0f ae 2f 31 db 66 66 90 eb 24 48 2
| RSP: 0018:ffffc900004abe20 EFLAGS: 00010246
| RAX: 0000000000000007 RBX: 00007ffe5b4c1680 RCX: 0000000000000000
| RDX: 0000000000000000 RSI: ffffffff820544ad RDI: 00007ffe5b4c1680
| RBP: ffffc900004abed0 R08: 0000000000000000 R09: 0000000000000001
| R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800394440c0
| R13: ffff880039442d80 R14: ffff8800394440c0 R15: 0000000000000007
| FS:  00007f81db2d4b80(0000) GS:ffff88003e880000(0000) knlGS:0000000000000000
| CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
| CR2: 00007f81db7a5000 CR3: 000000003aafa002 CR4: 0000000000060ee0
| Call Trace:
|  ? trace_hardirqs_on+0x22/0x110
|  fpu__restore_sig+0x28/0x40
|  __ia32_sys_rt_sigreturn+0x218/0x2aa
|  do_syscall_64+0x50/0x180
|  entry_SYSCALL_64_after_hwframe+0x49/0xbe
| RIP: 0033:0x7f81db68bdc7
| Code: 54 24 14 31 df 89 ee 0f a4 ed 05 c5 b9 72 d1 1e c5 79 7f 0c 24 01 fa 31 de 0f ac c0 07 01 ea c5 f1 72 f1 02 03 4c 24 18 31 c6 <89> d7 0f a4 d2 05 01 f1 31 c7 0f ac 3
| RSP: 002b:00007ffe5b4c1a80 EFLAGS: 00000286
| RAX: 00000000ad356612 RBX: 000000007d63f272 RCX: 00000000adc87c8e
| RDX: 00000000d90909d0 RSI: 000000008b541c65 RDI: 00000000188b44f4
| RBP: 00000000605100ab R08: 0000555c9ecd52a0 R09: 00007f81db7a8f80
| R10: 00007f81db7a9000 R11: 00007ffe5b4c1ae8 R12: 00007f81db7a5000
| R13: 0000555c9d0c60a0 R14: 00007f81db68e080 R15: 00007f81db778100
| Modules linked in:
| Dumping ftrace buffer:
|    (ftrace buffer empty)
| ---[ end trace a16fb09d293317cc ]---
| RIP: 0010:__fpu__restore_sig+0x2fa/0x710
| Code: 34 8b 95 5c ff ff ff 85 d2 75 2a 4c 89 fa 48 f7 d2 48 21 d0 0f 85 b8 03 00 00 66 66 90 4c 89 fa 48 89 df 44 89 f8 48 c1 ea 20 <48> 0f ae 2f 31 db 66 66 90 eb 24 48 2
| RSP: 0018:ffffc900004abe20 EFLAGS: 00010246
| RAX: 0000000000000007 RBX: 00007ffe1945ae40 RCX: 0000000000000000
| RDX: 0000000000000000 RSI: 00000000ffffffff RDI: 00007ffe1945ae40
| RBP: ffffc900004abed0 R08: 0000000000000000 R09: 0000000000000000
| R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800394440c0
| R13: ffff880039442d80 R14: ffff8800394440c0 R15: 0000000000000007
| FS:  00007f81db2d4b80(0000) GS:ffff88003e880000(0000) knlGS:0000000000000000
| CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
| CR2: 00007f81db7a5000 CR3: 000000003aafa002 CR4: 0000000000060ee0

Noisier but the task segfaulted.
I could add validate_xstate_header() + sanitize_restored_xstate() which
is what we do for 32bit-frames and should catch garbage. Then it ends
with:
| ssltc[1724] bad frame in rt_sigreturn frame:00000000ac8c6496 ip:7f4a10e36eda sp:7fff75054540 orax:ffffffffffffffff in libcrypto.so.1.1[7f4a10ce9000+19f000]

which would be also a segfault but with a smaller backtrace. Also
checking for XFEATURE_MASK_SUPERVISOR sounds like a good thing.
Right now XFEATURE_MASK_SUPERVISOR is not enabled in BV so it should not
make a difference but should be save in future.

Sebastian
diff mbox series

Patch

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 9e213a6703c84..5e86ff60a3a5c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,28 +137,11 @@  static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 	}
 }
 
-static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
-{
-	if (IS_ENABLED(CONFIG_X86_32))
-		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-	else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
-		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	/* See comment in copy_fxregs_to_kernel() below. */
-	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
-			  "m" (*fx));
-}
-
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 {
 	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
-static inline int copy_user_to_fregs(struct fregs_state __user *fx)
-{
-	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
 static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 {
 	if (IS_ENABLED(CONFIG_X86_32))
@@ -333,23 +316,6 @@  static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 	XSTATE_XRESTORE(xstate, lmask, hmask);
 }
 
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
-{
-	struct xregs_state *xstate = ((__force struct xregs_state *)buf);
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	stac();
-	XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-	clac();
-
-	return err;
-}
-
 /*
  * These must be called with preempt disabled. Returns
  * 'true' if the FPU state is still intact and we can
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 179e2b19976ad..9720529859483 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -228,23 +228,30 @@  sanitize_restored_xstate(union fpregs_state *state,
 /*
  * Restore the extended state if present. Otherwise, restore the FP/SSE state.
  */
-static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
+static void copy_to_fpregs_zeroing(struct fpu *fpu, u64 xbv, int fx_only)
 {
+	__fpregs_changes_begin();
 	if (use_xsave()) {
-		if ((unsigned long)buf % 64 || fx_only) {
+		if (fx_only) {
 			u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+
 			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
-			return copy_user_to_fxregs(buf);
+			copy_kernel_to_fxregs(&fpu->state.fxsave);
 		} else {
 			u64 init_bv = xfeatures_mask & ~xbv;
+
 			if (unlikely(init_bv))
 				copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
-			return copy_user_to_xregs(buf, xbv);
+			copy_kernel_to_xregs(&fpu->state.xsave, xbv);
 		}
 	} else if (use_fxsr()) {
-		return copy_user_to_fxregs(buf);
-	} else
-		return copy_user_to_fregs(buf);
+		copy_kernel_to_fxregs(&fpu->state.fxsave);
+	} else {
+		copy_kernel_to_fregs(&fpu->state.fsave);
+	}
+	clear_thread_flag(TIF_NEED_FPU_LOAD);
+	fpregs_activate(fpu);
+	__fpregs_changes_end();
 }
 
 static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
@@ -255,6 +262,7 @@  static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
+	int err = 0;
 
 	ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
 			 IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -298,7 +306,6 @@  static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		union fpregs_state *state;
 		void *tmp;
 		struct user_i387_ia32_struct env;
-		int err = 0;
 
 		tmp = kmalloc(sizeof(*state) + fpu_kernel_xstate_size + 64, GFP_KERNEL);
 		if (!tmp)
@@ -327,12 +334,16 @@  static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	} else {
 		/*
 		 * For 64-bit frames and 32-bit fsave frames, restore the user
-		 * state to the registers directly (with exceptions handled).
+		 * state from a copy in thread's fpu state.
 		 */
-		if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
+		err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+		if (err) {
 			fpu__clear(fpu);
-			return -1;
+			return -EFAULT;
 		}
+		if ((unsigned long)buf_fx % 64)
+			fx_only = 1;
+		copy_to_fpregs_zeroing(fpu, xfeatures, fx_only);
 	}
 
 	return 0;