Message ID | Zi9Ts1HcqiKzy9GX@gmail.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | x86/mm: Remove broken vsyscall emulation code from the page fault code | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Mon, Apr 29, 2024 at 10:00:51AM +0200, Ingo Molnar wrote: SNIP > The attached patch looks like the ObviouslyCorrect(tm) thing to do. > > NOTE! This broken code goes back to this commit in 2011: > > 4fc3490114bb ("x86-64: Set siginfo and context on vsyscall emulation faults") > > ... and back then the reason was to get all the siginfo details right. > Honestly, I do not for a moment believe that it's worth getting the siginfo > details right here, but part of the commit says: > > This fixes issues with UML when vsyscall=emulate. > > ... and so my patch to remove this garbage will probably break UML in this > situation. > > I do not believe that anybody should be running with vsyscall=emulate in > 2024 in the first place, much less if you are doing things like UML. But > let's see if somebody screams. > > Not-Yet-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > Signed-off-by: Ingo Molnar <mingo@kernel.org> > Link: https://lore.kernel.org/r/CAHk-=wh9D6f7HUkDgZHKmDCHUQmp+Co89GP+b8+z+G56BKeyNg@mail.gmail.com fwiw I can no longer trigger the invalid wait context bug with this change Tested-by: Jiri Olsa <jolsa@kernel.org> jirka > --- > arch/x86/entry/vsyscall/vsyscall_64.c | 25 ++----------------------- > arch/x86/include/asm/processor.h | 1 - > arch/x86/mm/fault.c | 33 +-------------------------------- > 3 files changed, 3 insertions(+), 56 deletions(-) > > diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c > index a3c0df11d0e6..3b0f61b2ea6d 100644 > --- a/arch/x86/entry/vsyscall/vsyscall_64.c > +++ b/arch/x86/entry/vsyscall/vsyscall_64.c > @@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) > > static bool write_ok_or_segv(unsigned long ptr, size_t size) > { > - /* > - * XXX: if access_ok, get_user, and put_user handled > - * sig_on_uaccess_err, this could go away. > - */ > - > if (!access_ok((void __user *)ptr, size)) { > struct thread_struct *thread = ¤t->thread; > > @@ -123,7 +118,6 @@ bool emulate_vsyscall(unsigned long error_code, > struct task_struct *tsk; > unsigned long caller; > int vsyscall_nr, syscall_nr, tmp; > - int prev_sig_on_uaccess_err; > long ret; > unsigned long orig_dx; > > @@ -234,12 +228,8 @@ bool emulate_vsyscall(unsigned long error_code, > goto do_ret; /* skip requested */ > > /* > - * With a real vsyscall, page faults cause SIGSEGV. We want to > - * preserve that behavior to make writing exploits harder. > + * With a real vsyscall, page faults cause SIGSEGV. > */ > - prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; > - current->thread.sig_on_uaccess_err = 1; > - > ret = -EFAULT; > switch (vsyscall_nr) { > case 0: > @@ -262,23 +252,12 @@ bool emulate_vsyscall(unsigned long error_code, > break; > } > > - current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; > - > check_fault: > if (ret == -EFAULT) { > /* Bad news -- userspace fed a bad pointer to a vsyscall. */ > warn_bad_vsyscall(KERN_INFO, regs, > "vsyscall fault (exploit attempt?)"); > - > - /* > - * If we failed to generate a signal for any reason, > - * generate one here. (This should be impossible.) > - */ > - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && > - !sigismember(&tsk->pending.signal, SIGSEGV))) > - goto sigsegv; > - > - return true; /* Don't emulate the ret. */ > + goto sigsegv; > } > > regs->ax = ret; > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index 811548f131f4..78e51b0d6433 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -472,7 +472,6 @@ struct thread_struct { > unsigned long iopl_emul; > > unsigned int iopl_warn:1; > - unsigned int sig_on_uaccess_err:1; > > /* > * Protection Keys Register for Userspace. Loaded immediately on > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 6b2ca8ba75b8..f26ecabc9424 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -724,39 +724,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, > WARN_ON_ONCE(user_mode(regs)); > > /* Are we prepared to handle this kernel fault? */ > - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { > - /* > - * Any interrupt that takes a fault gets the fixup. This makes > - * the below recursive fault logic only apply to a faults from > - * task context. > - */ > - if (in_interrupt()) > - return; > - > - /* > - * Per the above we're !in_interrupt(), aka. task context. > - * > - * In this case we need to make sure we're not recursively > - * faulting through the emulate_vsyscall() logic. > - */ > - if (current->thread.sig_on_uaccess_err && signal) { > - sanitize_error_code(address, &error_code); > - > - set_signal_archinfo(address, error_code); > - > - if (si_code == SEGV_PKUERR) { > - force_sig_pkuerr((void __user *)address, pkey); > - } else { > - /* XXX: hwpoison faults will set the wrong code. */ > - force_sig_fault(signal, si_code, (void __user *)address); > - } > - } > - > - /* > - * Barring that, we can do the fixup and be happy. > - */ > + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) > return; > - } > > /* > * AMD erratum #91 manifests as a spurious page fault on a PREFETCH >
On Mon, 29 Apr 2024 at 01:00, Ingo Molnar <mingo@kernel.org> wrote: > > I did some Simple Testing™, and nothing seemed to break in any way visible > to me, and the diffstat is lovely: > > 3 files changed, 3 insertions(+), 56 deletions(-) > > Might stick this into tip:x86/mm and see what happens? Well, Hilf had it go through the syzbot testing, and Jiri seems to have tested it on his setup too, so it looks like it's all good, and you can change the "Not-Yet-Signed-off-by" to be a proper sign-off from me. It would be good to have some UML testing done, but at the same time I do think that anybody running UML on modern kernels should be running a modern user-mode setup too, so while the exact SIGSEGV details may have been an issue in 2011, I don't think it's reasonable to think that it's an issue in 2024. Linus
On Mon, 29 Apr 2024 at 08:51, Linus Torvalds <torvalds@linux-foundation.org> wrote: > > Well, Hilf had it go through the syzbot testing, and Jiri seems to > have tested it on his setup too, so it looks like it's all good, and > you can change the "Not-Yet-Signed-off-by" to be a proper sign-off > from me. Side note: having looked more at this, I suspect we have room for further cleanups in this area. In particular, I think the page fault emulation code should be moved from do_user_addr_fault() to do_kern_addr_fault(), and the horrible hack that is fault_in_kernel_space() should be removed (it is what now makes a vsyscall page fault be treated as a user address, and the only _reason_ for that is that we do the vsyscall handling in the wrong place). I also think that the vsyscall emulation code should just be cleaned up - instead of looking up the system call number and then calling the __x64_xyz() system call stub, I think we should just write out the code in-place. That would get the SIGSEGV cases right too, and I think it would actually clean up the code. We already do almost everything but the (trivial) low-level ops anyway. But I think my patch to remove the 'sig_on_uaccess_err' should just go in first, since it fixes a real and present issue. And then if somebody has the energy - or if it turns out that we actually need to get the SIGSEGV siginfo details right - we can do the other cleanups. They are mostly unrelated, but the current sig_on_uaccess_err code just makes everything more complicated and needs to go. Linus
On Mon, 29 Apr 2024 at 11:47, Linus Torvalds <torvalds@linux-foundation.org> wrote: > > In particular, I think the page fault emulation code should be moved > from do_user_addr_fault() to do_kern_addr_fault(), and the horrible > hack that is fault_in_kernel_space() should be removed (it is what now > makes a vsyscall page fault be treated as a user address, and the only > _reason_ for that is that we do the vsyscall handling in the wrong > place). Final note: we should also remove the XONLY option entirely, and remove all the strange page table handling we currently do for it. It won't work anyway on future CPUs with LASS, and we *have* to emulate things (and not in the page fault path, I think LASS will cause a GP fault). I think the LASS patches ended up just disabling LASS if people wanted vsyscall, which is probably the worst case. Again, this is more of a "I think we have more work to do", and should all happen after that sig_on_uaccess_err stuff is gone. I guess that patch to rip out sig_on_uaccess_err needs to go into 6.9 and even be marked for stable, since it most definitely breaks some stuff currently. Even if that "some stuff" is pretty esoteric (ie "vsyscall=emulate" together with tracing). Linus
On Mon, Apr 29, 2024 at 12:07 PM Linus Torvalds <torvalds@linux-foundation.org> wrote: > > On Mon, 29 Apr 2024 at 11:47, Linus Torvalds > <torvalds@linux-foundation.org> wrote: > > > > In particular, I think the page fault emulation code should be moved > > from do_user_addr_fault() to do_kern_addr_fault(), and the horrible > > hack that is fault_in_kernel_space() should be removed (it is what now > > makes a vsyscall page fault be treated as a user address, and the only > > _reason_ for that is that we do the vsyscall handling in the wrong > > place). > > Final note: we should also remove the XONLY option entirely, and > remove all the strange page table handling we currently do for it. > > It won't work anyway on future CPUs with LASS, and we *have* to > emulate things (and not in the page fault path, I think LASS will > cause a GP fault). What strange page table handling do we do for XONLY? EMULATE actually involves page tables. XONLY is just in the "gate area" (which is more or less just a procfs thing) and the page fault code. So I think we should remove EMULATE before removing XONLY. We already tried pretty hard to get everyone to stop using EMULATE.
On Mon, Apr 29, 2024 at 6:51 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Mon, Apr 29, 2024 at 10:00:51AM +0200, Ingo Molnar wrote: > > SNIP > > > The attached patch looks like the ObviouslyCorrect(tm) thing to do. > > > > NOTE! This broken code goes back to this commit in 2011: > > > > 4fc3490114bb ("x86-64: Set siginfo and context on vsyscall emulation faults") > > > > ... and back then the reason was to get all the siginfo details right. > > Honestly, I do not for a moment believe that it's worth getting the siginfo > > details right here, but part of the commit says: > > > > This fixes issues with UML when vsyscall=emulate. > > > > ... and so my patch to remove this garbage will probably break UML in this > > situation. > > > > I do not believe that anybody should be running with vsyscall=emulate in > > 2024 in the first place, much less if you are doing things like UML. But > > let's see if somebody screams. > > > > Not-Yet-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > > Signed-off-by: Ingo Molnar <mingo@kernel.org> > > Link: https://lore.kernel.org/r/CAHk-=wh9D6f7HUkDgZHKmDCHUQmp+Co89GP+b8+z+G56BKeyNg@mail.gmail.com > > fwiw I can no longer trigger the invalid wait context bug > with this change > > Tested-by: Jiri Olsa <jolsa@kernel.org> Acked-by: Andy Lutomirski <luto@kernel.org>
On Mon, 29 Apr 2024 at 16:30, Andy Lutomirski <luto@amacapital.net> wrote: > > What strange page table handling do we do for XONLY? Ahh, I misread set_vsyscall_pgtable_user_bits(). It's used for EMULATE not for XONLY. And the code in pti_setup_vsyscall() is just wrong, and does it for all cases. > So I think we should remove EMULATE before removing XONLY. Ok, looking at that again, I don't disagree. I misread that XONLY as mapping it executable, but it is actually just mapping it readable Yes, let's remove EMULATE, and keep XONLY. Linus
* Linus Torvalds <torvalds@linux-foundation.org> wrote: > I guess that patch to rip out sig_on_uaccess_err needs to go into 6.9 and > even be marked for stable, since it most definitely breaks some stuff > currently. Even if that "some stuff" is pretty esoteric (ie > "vsyscall=emulate" together with tracing). Yeah - I just put it into tip:x86/urgent as-is, with the various Tested-by and Acked-by tags added, and we'll send it to you later this week if all goes well. Thanks, Ingo
Hi Ingo,
kernel test robot noticed the following build warnings:
[auto build test WARNING on linux/master]
[also build test WARNING on tip/x86/mm linus/master v6.9-rc6 next-20240430]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ingo-Molnar/x86-mm-Remove-broken-vsyscall-emulation-code-from-the-page-fault-code/20240430-135258
base: linux/master
patch link: https://lore.kernel.org/r/Zi9Ts1HcqiKzy9GX%40gmail.com
patch subject: [PATCH] x86/mm: Remove broken vsyscall emulation code from the page fault code
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240430/202404302220.EkdfEBSB-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240430/202404302220.EkdfEBSB-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404302220.EkdfEBSB-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/x86/entry/vsyscall/vsyscall_64.c: In function 'emulate_vsyscall':
>> arch/x86/entry/vsyscall/vsyscall_64.c:118:29: warning: variable 'tsk' set but not used [-Wunused-but-set-variable]
118 | struct task_struct *tsk;
| ^~~
vim +/tsk +118 arch/x86/entry/vsyscall/vsyscall_64.c
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 114
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 115 bool emulate_vsyscall(unsigned long error_code,
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 116 struct pt_regs *regs, unsigned long address)
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 117 {
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 @118 struct task_struct *tsk;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 119 unsigned long caller;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 120 int vsyscall_nr, syscall_nr, tmp;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 121 long ret;
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 122 unsigned long orig_dx;
7460ed2844ffad arch/x86_64/kernel/vsyscall.c John Stultz 2007-02-16 123
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 124 /* Write faults or kernel-privilege faults never get fixed up. */
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 125 if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 126 return false;
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 127
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 128 if (!(error_code & X86_PF_INSTR)) {
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 129 /* Failed vsyscall read */
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 130 if (vsyscall_mode == EMULATE)
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 131 return false;
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 132
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 133 /*
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 134 * User code tried and failed to read the vsyscall page.
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 135 */
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 136 warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 137 return false;
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 138 }
918ce325098a4e arch/x86/entry/vsyscall/vsyscall_64.c Andy Lutomirski 2019-06-26 139
c9712944b2a123 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-07-13 140 /*
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 141 * No point in checking CS -- the only way to get here is a user mode
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 142 * trap to a high address, which means that we're in 64-bit user code.
c9712944b2a123 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-07-13 143 */
7460ed2844ffad arch/x86_64/kernel/vsyscall.c John Stultz 2007-02-16 144
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 145 WARN_ON_ONCE(address != regs->ip);
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 146
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 147 if (vsyscall_mode == NONE) {
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 148 warn_bad_vsyscall(KERN_INFO, regs,
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 149 "vsyscall attempted with vsyscall=none");
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 150 return false;
c9712944b2a123 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-07-13 151 }
7460ed2844ffad arch/x86_64/kernel/vsyscall.c John Stultz 2007-02-16 152
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 153 vsyscall_nr = addr_to_vsyscall_nr(address);
c149a665ac488e arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-03 154
c149a665ac488e arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-03 155 trace_emulate_vsyscall(vsyscall_nr);
c149a665ac488e arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-03 156
c9712944b2a123 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-07-13 157 if (vsyscall_nr < 0) {
c9712944b2a123 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-07-13 158 warn_bad_vsyscall(KERN_WARNING, regs,
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 159 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 160 goto sigsegv;
7460ed2844ffad arch/x86_64/kernel/vsyscall.c John Stultz 2007-02-16 161 }
7460ed2844ffad arch/x86_64/kernel/vsyscall.c John Stultz 2007-02-16 162
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 163 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 164 warn_bad_vsyscall(KERN_WARNING, regs,
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 165 "vsyscall with bad stack (exploit attempt?)");
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 166 goto sigsegv;
^1da177e4c3f41 arch/x86_64/kernel/vsyscall.c Linus Torvalds 2005-04-16 167 }
^1da177e4c3f41 arch/x86_64/kernel/vsyscall.c Linus Torvalds 2005-04-16 168
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 169 tsk = current;
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 170
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 171 /*
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 172 * Check for access_ok violations and find the syscall nr.
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 173 *
46ed99d1b7c929 arch/x86/kernel/vsyscall_64.c Emil Goode 2012-04-01 174 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 175 * 64-bit, so we don't need to special-case it here. For all the
46ed99d1b7c929 arch/x86/kernel/vsyscall_64.c Emil Goode 2012-04-01 176 * vsyscalls, NULL means "don't write anything" not "write it at
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 177 * address 0".
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 178 */
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 179 switch (vsyscall_nr) {
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 180 case 0:
ddccf40fe82b7a arch/x86/entry/vsyscall/vsyscall_64.c Arnd Bergmann 2017-11-23 181 if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) ||
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 182 !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 183 ret = -EFAULT;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 184 goto check_fault;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 185 }
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 186
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 187 syscall_nr = __NR_gettimeofday;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 188 break;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 189
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 190 case 1:
21346564ccad17 arch/x86/entry/vsyscall/vsyscall_64.c Arnd Bergmann 2019-11-05 191 if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) {
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 192 ret = -EFAULT;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 193 goto check_fault;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 194 }
5651721edec25b arch/x86/kernel/vsyscall_64.c Will Drewry 2012-07-13 195
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 196 syscall_nr = __NR_time;
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 197 break;
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 198
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 199 case 2:
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 200 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 201 !write_ok_or_segv(regs->si, sizeof(unsigned))) {
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 202 ret = -EFAULT;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 203 goto check_fault;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 204 }
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 205
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 206 syscall_nr = __NR_getcpu;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 207 break;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 208 }
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 209
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 210 /*
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 211 * Handle seccomp. regs->ip must be the original value.
5fb94e9ca333f0 arch/x86/entry/vsyscall/vsyscall_64.c Mauro Carvalho Chehab 2018-05-08 212 * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst.
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 213 *
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 214 * We could optimize the seccomp disabled case, but performance
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 215 * here doesn't matter.
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 216 */
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 217 regs->orig_ax = syscall_nr;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 218 regs->ax = -ENOSYS;
fefad9ef58ffc2 arch/x86/entry/vsyscall/vsyscall_64.c Christian Brauner 2019-09-24 219 tmp = secure_computing();
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 220 if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 221 warn_bad_vsyscall(KERN_DEBUG, regs,
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 222 "seccomp tried to change syscall nr or ip");
fcb116bc43c8c3 arch/x86/entry/vsyscall/vsyscall_64.c Eric W. Biederman 2021-11-18 223 force_exit_sig(SIGSYS);
695dd0d634df89 arch/x86/entry/vsyscall/vsyscall_64.c Eric W. Biederman 2021-10-20 224 return true;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 225 }
26893107aa717c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2014-11-04 226 regs->orig_ax = -1;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 227 if (tmp)
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 228 goto do_ret; /* skip requested */
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 229
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 230 /*
198d72414c92c7 arch/x86/entry/vsyscall/vsyscall_64.c Ingo Molnar 2024-04-29 231 * With a real vsyscall, page faults cause SIGSEGV.
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 232 */
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 233 ret = -EFAULT;
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 234 switch (vsyscall_nr) {
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 235 case 0:
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 236 /* this decodes regs->di and regs->si on its own */
d5a00528b58cdb arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-09 237 ret = __x64_sys_gettimeofday(regs);
5651721edec25b arch/x86/kernel/vsyscall_64.c Will Drewry 2012-07-13 238 break;
5651721edec25b arch/x86/kernel/vsyscall_64.c Will Drewry 2012-07-13 239
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 240 case 1:
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 241 /* this decodes regs->di on its own */
d5a00528b58cdb arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-09 242 ret = __x64_sys_time(regs);
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 243 break;
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 244
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 245 case 2:
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 246 /* while we could clobber regs->dx, we didn't in the past... */
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 247 orig_dx = regs->dx;
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 248 regs->dx = 0;
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 249 /* this decodes regs->di, regs->si and regs->dx on its own */
d5a00528b58cdb arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-09 250 ret = __x64_sys_getcpu(regs);
fa697140f9a201 arch/x86/entry/vsyscall/vsyscall_64.c Dominik Brodowski 2018-04-05 251 regs->dx = orig_dx;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 252 break;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 253 }
8c73626ab28527 arch/x86/kernel/vsyscall_64.c John Stultz 2010-07-13 254
87b526d349b04c arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2012-10-01 255 check_fault:
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 256 if (ret == -EFAULT) {
4fc3490114bb15 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-11-07 257 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 258 warn_bad_vsyscall(KERN_INFO, regs,
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 259 "vsyscall fault (exploit attempt?)");
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 260 goto sigsegv;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 261 }
8c73626ab28527 arch/x86/kernel/vsyscall_64.c John Stultz 2010-07-13 262
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 263 regs->ax = ret;
8c73626ab28527 arch/x86/kernel/vsyscall_64.c John Stultz 2010-07-13 264
5651721edec25b arch/x86/kernel/vsyscall_64.c Will Drewry 2012-07-13 265 do_ret:
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 266 /* Emulate a ret instruction. */
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 267 regs->ip = caller;
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 268 regs->sp += 8;
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 269 return true;
c08c820508233b arch/x86_64/kernel/vsyscall.c Vojtech Pavlik 2006-09-26 270
5cec93c216db77 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-06-05 271 sigsegv:
3cf5d076fb4d48 arch/x86/entry/vsyscall/vsyscall_64.c Eric W. Biederman 2019-05-23 272 force_sig(SIGSEGV);
3ae36655b97a03 arch/x86/kernel/vsyscall_64.c Andy Lutomirski 2011-08-10 273 return true;
^1da177e4c3f41 arch/x86_64/kernel/vsyscall.c Linus Torvalds 2005-04-16 274 }
^1da177e4c3f41 arch/x86_64/kernel/vsyscall.c Linus Torvalds 2005-04-16 275
* Ingo Molnar <mingo@kernel.org> wrote: > > * Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > I guess that patch to rip out sig_on_uaccess_err needs to go into 6.9 and > > even be marked for stable, since it most definitely breaks some stuff > > currently. Even if that "some stuff" is pretty esoteric (ie > > "vsyscall=emulate" together with tracing). > > Yeah - I just put it into tip:x86/urgent as-is, with the various Tested-by > and Acked-by tags added, and we'll send it to you later this week if all > goes well. Update: added the delta patch below to the fix, because now 'tsk' is unused in emulate_vsyscall(). Thanks, Ingo arch/x86/entry/vsyscall/vsyscall_64.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 3b0f61b2ea6d..2fb7d53cf333 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -115,7 +115,6 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) bool emulate_vsyscall(unsigned long error_code, struct pt_regs *regs, unsigned long address) { - struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; long ret; @@ -166,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code, goto sigsegv; } - tsk = current; - /* * Check for access_ok violations and find the syscall nr. *
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index a3c0df11d0e6..3b0f61b2ea6d 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) static bool write_ok_or_segv(unsigned long ptr, size_t size) { - /* - * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_err, this could go away. - */ - if (!access_ok((void __user *)ptr, size)) { struct thread_struct *thread = ¤t->thread; @@ -123,7 +118,6 @@ bool emulate_vsyscall(unsigned long error_code, struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_err; long ret; unsigned long orig_dx; @@ -234,12 +228,8 @@ bool emulate_vsyscall(unsigned long error_code, goto do_ret; /* skip requested */ /* - * With a real vsyscall, page faults cause SIGSEGV. We want to - * preserve that behavior to make writing exploits harder. + * With a real vsyscall, page faults cause SIGSEGV. */ - prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; - current->thread.sig_on_uaccess_err = 1; - ret = -EFAULT; switch (vsyscall_nr) { case 0: @@ -262,23 +252,12 @@ bool emulate_vsyscall(unsigned long error_code, break; } - current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; - check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); - - /* - * If we failed to generate a signal for any reason, - * generate one here. (This should be impossible.) - */ - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && - !sigismember(&tsk->pending.signal, SIGSEGV))) - goto sigsegv; - - return true; /* Don't emulate the ret. */ + goto sigsegv; } regs->ax = ret; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 811548f131f4..78e51b0d6433 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -472,7 +472,6 @@ struct thread_struct { unsigned long iopl_emul; unsigned int iopl_warn:1; - unsigned int sig_on_uaccess_err:1; /* * Protection Keys Register for Userspace. Loaded immediately on diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6b2ca8ba75b8..f26ecabc9424 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -724,39 +724,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - } /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH