diff mbox series

arm: fix page faults in do_alignment

Message ID 1567171877-101949-1-git-send-email-jingxiangfeng@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm: fix page faults in do_alignment | expand

Commit Message

Jing Xiangfeng Aug. 30, 2019, 1:31 p.m. UTC
The function do_alignment can handle misaligned address for user and
kernel space. If it is a userspace access, do_alignment may fail on
a low-memory situation, because page faults are disabled in
probe_kernel_address.

Fix this by using __copy_from_user stead of probe_kernel_address.

Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
---
 arch/arm/mm/alignment.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

Comments

Russell King (Oracle) Aug. 30, 2019, 1:35 p.m. UTC | #1
On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> The function do_alignment can handle misaligned address for user and
> kernel space. If it is a userspace access, do_alignment may fail on
> a low-memory situation, because page faults are disabled in
> probe_kernel_address.
> 
> Fix this by using __copy_from_user stead of probe_kernel_address.
> 
> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>

NAK.

The "scheduling while atomic warning in alignment handling code" is
caused by fixing up the page fault while trying to handle the
mis-alignment fault generated from an instruction in atomic context.

Your patch re-introduces that bug.

> ---
>  arch/arm/mm/alignment.c | 16 +++++++++++++---
>  1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
> index 04b3643..2ccabd3 100644
> --- a/arch/arm/mm/alignment.c
> +++ b/arch/arm/mm/alignment.c
> @@ -774,6 +774,7 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>  	unsigned long instr = 0, instrptr;
>  	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
>  	unsigned int type;
> +	mm_segment_t fs;
>  	unsigned int fault;
>  	u16 tinstr = 0;
>  	int isize = 4;
> @@ -784,16 +785,22 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>  
>  	instrptr = instruction_pointer(regs);
>  
> +	fs = get_fs();
> +	set_fs(KERNEL_DS);
>  	if (thumb_mode(regs)) {
>  		u16 *ptr = (u16 *)(instrptr & ~1);
> -		fault = probe_kernel_address(ptr, tinstr);
> +		fault = __copy_from_user(tinstr,
> +				(__force const void __user *)ptr,
> +				sizeof(tinstr));
>  		tinstr = __mem_to_opcode_thumb16(tinstr);
>  		if (!fault) {
>  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
>  			    IS_T32(tinstr)) {
>  				/* Thumb-2 32-bit */
>  				u16 tinst2 = 0;
> -				fault = probe_kernel_address(ptr + 1, tinst2);
> +				fault = __copy_from_user(tinst2,
> +						(__force const void __user *)(ptr+1),
> +						sizeof(tinst2));
>  				tinst2 = __mem_to_opcode_thumb16(tinst2);
>  				instr = __opcode_thumb32_compose(tinstr, tinst2);
>  				thumb2_32b = 1;
> @@ -803,10 +810,13 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>  			}
>  		}
>  	} else {
> -		fault = probe_kernel_address((void *)instrptr, instr);
> +		fault = __copy_from_user(instr,
> +				(__force const void __user *)instrptr,
> +				sizeof(instr));
>  		instr = __mem_to_opcode_arm(instr);
>  	}
>  
> +	set_fs(fs);
>  	if (fault) {
>  		type = TYPE_FAULT;
>  		goto bad_or_fault;
> -- 
> 1.8.3.1
> 
>
Russell King (Oracle) Aug. 30, 2019, 1:48 p.m. UTC | #2
Please fix your email.

  jingxiangfeng@huawei.com
      host mx7.huawei.com [168.195.93.46]
      SMTP error from remote mail server after pipelined DATA:
      554 5.7.1 spf check result is none

SPF is *not* required for email.

If you wish to impose such restrictions on email, then I reserve the
right to ignore your patches until this issue is resolved! ;)

On Fri, Aug 30, 2019 at 02:35:22PM +0100, Russell King - ARM Linux admin wrote:
> On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> > The function do_alignment can handle misaligned address for user and
> > kernel space. If it is a userspace access, do_alignment may fail on
> > a low-memory situation, because page faults are disabled in
> > probe_kernel_address.
> > 
> > Fix this by using __copy_from_user stead of probe_kernel_address.
> > 
> > Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> > Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> 
> NAK.
> 
> The "scheduling while atomic warning in alignment handling code" is
> caused by fixing up the page fault while trying to handle the
> mis-alignment fault generated from an instruction in atomic context.
> 
> Your patch re-introduces that bug.
> 
> > ---
> >  arch/arm/mm/alignment.c | 16 +++++++++++++---
> >  1 file changed, 13 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
> > index 04b3643..2ccabd3 100644
> > --- a/arch/arm/mm/alignment.c
> > +++ b/arch/arm/mm/alignment.c
> > @@ -774,6 +774,7 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
> >  	unsigned long instr = 0, instrptr;
> >  	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
> >  	unsigned int type;
> > +	mm_segment_t fs;
> >  	unsigned int fault;
> >  	u16 tinstr = 0;
> >  	int isize = 4;
> > @@ -784,16 +785,22 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
> >  
> >  	instrptr = instruction_pointer(regs);
> >  
> > +	fs = get_fs();
> > +	set_fs(KERNEL_DS);
> >  	if (thumb_mode(regs)) {
> >  		u16 *ptr = (u16 *)(instrptr & ~1);
> > -		fault = probe_kernel_address(ptr, tinstr);
> > +		fault = __copy_from_user(tinstr,
> > +				(__force const void __user *)ptr,
> > +				sizeof(tinstr));
> >  		tinstr = __mem_to_opcode_thumb16(tinstr);
> >  		if (!fault) {
> >  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
> >  			    IS_T32(tinstr)) {
> >  				/* Thumb-2 32-bit */
> >  				u16 tinst2 = 0;
> > -				fault = probe_kernel_address(ptr + 1, tinst2);
> > +				fault = __copy_from_user(tinst2,
> > +						(__force const void __user *)(ptr+1),
> > +						sizeof(tinst2));
> >  				tinst2 = __mem_to_opcode_thumb16(tinst2);
> >  				instr = __opcode_thumb32_compose(tinstr, tinst2);
> >  				thumb2_32b = 1;
> > @@ -803,10 +810,13 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
> >  			}
> >  		}
> >  	} else {
> > -		fault = probe_kernel_address((void *)instrptr, instr);
> > +		fault = __copy_from_user(instr,
> > +				(__force const void __user *)instrptr,
> > +				sizeof(instr));
> >  		instr = __mem_to_opcode_arm(instr);
> >  	}
> >  
> > +	set_fs(fs);
> >  	if (fault) {
> >  		type = TYPE_FAULT;
> >  		goto bad_or_fault;
> > -- 
> > 1.8.3.1
> > 
> > 
> 
> -- 
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
> According to speedtest.net: 11.9Mbps down 500kbps up
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Eric W. Biederman Aug. 30, 2019, 7:45 p.m. UTC | #3
Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:

> On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>> The function do_alignment can handle misaligned address for user and
>> kernel space. If it is a userspace access, do_alignment may fail on
>> a low-memory situation, because page faults are disabled in
>> probe_kernel_address.
>> 
>> Fix this by using __copy_from_user stead of probe_kernel_address.
>> 
>> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>
> NAK.
>
> The "scheduling while atomic warning in alignment handling code" is
> caused by fixing up the page fault while trying to handle the
> mis-alignment fault generated from an instruction in atomic context.
>
> Your patch re-introduces that bug.

And the patch that fixed scheduling while atomic apparently introduced a
regression.  Admittedly a regression that took 6 years to track down but
still.

So it looks like the code needs to do something like:

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 04b36436cbc0..5e2b8623851e 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -784,6 +784,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
 	instrptr = instruction_pointer(regs);
 
+	if (user_mode(regs))
+		goto user;
+
 	if (thumb_mode(regs)) {
 		u16 *ptr = (u16 *)(instrptr & ~1);
 		fault = probe_kernel_address(ptr, tinstr);
@@ -933,6 +936,34 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	return 1;
 
  user:
+	if (thumb_mode(regs)) {
+		u16 *ptr = (u16 *)(instrptr & ~1);
+		fault = get_user(tinstr, ptr);
+		tinstr = __mem_to_opcode_thumb16(tinstr);
+		if (!fault) {
+			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
+			    IS_T32(tinstr)) {
+				/* Thumb-2 32-bit */
+				u16 tinst2 = 0;
+				fault = get_user(ptr + 1, tinst2);
+				tinst2 = __mem_to_opcode_thumb16(tinst2);
+				instr = __opcode_thumb32_compose(tinstr, tinst2);
+				thumb2_32b = 1;
+			} else {
+				isize = 2;
+				instr = thumb2arm(tinstr);
+			}
+		}
+	} else {
+		fault = get_user(instr, (u32*)instrptr);
+		instr = __mem_to_opcode_arm(instr);
+	}
+
+	if (fault) {
+		type = TYPE_FAULT;
+		goto bad_or_fault;
+	}
+
 	ai_user += 1;
 
 	if (ai_usermode & UM_WARN)

Eric
Russell King (Oracle) Aug. 30, 2019, 8:30 p.m. UTC | #4
On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> 
> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> >> The function do_alignment can handle misaligned address for user and
> >> kernel space. If it is a userspace access, do_alignment may fail on
> >> a low-memory situation, because page faults are disabled in
> >> probe_kernel_address.
> >> 
> >> Fix this by using __copy_from_user stead of probe_kernel_address.
> >> 
> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> >
> > NAK.
> >
> > The "scheduling while atomic warning in alignment handling code" is
> > caused by fixing up the page fault while trying to handle the
> > mis-alignment fault generated from an instruction in atomic context.
> >
> > Your patch re-introduces that bug.
> 
> And the patch that fixed scheduling while atomic apparently introduced a
> regression.  Admittedly a regression that took 6 years to track down but
> still.

Right, and given the number of years, we are trading one regression for
a different regression.  If we revert to the original code where we
fix up, we will end up with people complaining about a "new" regression
caused by reverting the previous fix.  Follow this policy and we just
end up constantly reverting the previous revert.

The window is very small - the page in question will have had to have
instructions read from it immediately prior to the handler being entered,
and would have had to be made "old" before subsequently being unmapped.

Rather than excessively complicating the code and making it even more
inefficient (as in your patch), we could instead retry executing the
instruction when we discover that the page is unavailable, which should
cause the page to be paged back in.

If the page really is unavailable, the prefetch abort should cause a
SEGV to be raised, otherwise the re-execution should replace the page.

The danger to that approach is we page it back in, and it gets paged
back out before we're able to read the instruction indefinitely.

However, as it's impossible for me to contact the submitter, anything
I do will be poking about in the dark and without any way to validate
that it does fix the problem, so I think apart from reviewing of any
patches, there's not much I can do.
Eric W. Biederman Aug. 30, 2019, 9:02 p.m. UTC | #5
Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:

> On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
>> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> 
>> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>> >> The function do_alignment can handle misaligned address for user and
>> >> kernel space. If it is a userspace access, do_alignment may fail on
>> >> a low-memory situation, because page faults are disabled in
>> >> probe_kernel_address.
>> >> 
>> >> Fix this by using __copy_from_user stead of probe_kernel_address.
>> >> 
>> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>> >
>> > NAK.
>> >
>> > The "scheduling while atomic warning in alignment handling code" is
>> > caused by fixing up the page fault while trying to handle the
>> > mis-alignment fault generated from an instruction in atomic context.
>> >
>> > Your patch re-introduces that bug.
>> 
>> And the patch that fixed scheduling while atomic apparently introduced a
>> regression.  Admittedly a regression that took 6 years to track down but
>> still.
>
> Right, and given the number of years, we are trading one regression for
> a different regression.  If we revert to the original code where we
> fix up, we will end up with people complaining about a "new" regression
> caused by reverting the previous fix.  Follow this policy and we just
> end up constantly reverting the previous revert.
>
> The window is very small - the page in question will have had to have
> instructions read from it immediately prior to the handler being entered,
> and would have had to be made "old" before subsequently being unmapped.

> Rather than excessively complicating the code and making it even more
> inefficient (as in your patch), we could instead retry executing the
> instruction when we discover that the page is unavailable, which should
> cause the page to be paged back in.

My patch does not introduce any inefficiencies.  It onlys moves the
check for user_mode up a bit.  My patch did duplicate the code.

> If the page really is unavailable, the prefetch abort should cause a
> SEGV to be raised, otherwise the re-execution should replace the page.
>
> The danger to that approach is we page it back in, and it gets paged
> back out before we're able to read the instruction indefinitely.

I would think either a little code duplication or a function that looks
at user_mode(regs) and picks the appropriate kind of copy to do would be
the best way to go.  Because what needs to happen in the two cases for
reading the instruction are almost completely different.

> However, as it's impossible for me to contact the submitter, anything
> I do will be poking about in the dark and without any way to validate
> that it does fix the problem, so I think apart from reviewing of any
> patches, there's not much I can do.

I didn't realize your emails to him were bouncing.  That is odd.  Mine
don't appear to be.

Eric
Russell King (Oracle) Aug. 30, 2019, 10:29 p.m. UTC | #6
On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> 
> > On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> >> 
> >> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> >> >> The function do_alignment can handle misaligned address for user and
> >> >> kernel space. If it is a userspace access, do_alignment may fail on
> >> >> a low-memory situation, because page faults are disabled in
> >> >> probe_kernel_address.
> >> >> 
> >> >> Fix this by using __copy_from_user stead of probe_kernel_address.
> >> >> 
> >> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> >> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> >> >
> >> > NAK.
> >> >
> >> > The "scheduling while atomic warning in alignment handling code" is
> >> > caused by fixing up the page fault while trying to handle the
> >> > mis-alignment fault generated from an instruction in atomic context.
> >> >
> >> > Your patch re-introduces that bug.
> >> 
> >> And the patch that fixed scheduling while atomic apparently introduced a
> >> regression.  Admittedly a regression that took 6 years to track down but
> >> still.
> >
> > Right, and given the number of years, we are trading one regression for
> > a different regression.  If we revert to the original code where we
> > fix up, we will end up with people complaining about a "new" regression
> > caused by reverting the previous fix.  Follow this policy and we just
> > end up constantly reverting the previous revert.
> >
> > The window is very small - the page in question will have had to have
> > instructions read from it immediately prior to the handler being entered,
> > and would have had to be made "old" before subsequently being unmapped.
> 
> > Rather than excessively complicating the code and making it even more
> > inefficient (as in your patch), we could instead retry executing the
> > instruction when we discover that the page is unavailable, which should
> > cause the page to be paged back in.
> 
> My patch does not introduce any inefficiencies.  It onlys moves the
> check for user_mode up a bit.  My patch did duplicate the code.
> 
> > If the page really is unavailable, the prefetch abort should cause a
> > SEGV to be raised, otherwise the re-execution should replace the page.
> >
> > The danger to that approach is we page it back in, and it gets paged
> > back out before we're able to read the instruction indefinitely.
> 
> I would think either a little code duplication or a function that looks
> at user_mode(regs) and picks the appropriate kind of copy to do would be
> the best way to go.  Because what needs to happen in the two cases for
> reading the instruction are almost completely different.

That is what I mean.  I'd prefer to avoid that with the large chunk of
code.  How about instead adding a local replacement for
probe_kernel_address() that just sorts out the reading, rather than
duplicating all the code to deal with thumb fixup. 

> > However, as it's impossible for me to contact the submitter, anything
> > I do will be poking about in the dark and without any way to validate
> > that it does fix the problem, so I think apart from reviewing of any
> > patches, there's not much I can do.
> 
> I didn't realize your emails to him were bouncing.  That is odd.  Mine
> don't appear to be.

Hmm, so the fact I posted publically in reply to my reply with the MTA
bounce message didn't give you a clue?
Jing Xiangfeng Aug. 31, 2019, 1:49 a.m. UTC | #7
On 2019/8/30 21:35, Russell King - ARM Linux admin wrote:
> On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>> The function do_alignment can handle misaligned address for user and
>> kernel space. If it is a userspace access, do_alignment may fail on
>> a low-memory situation, because page faults are disabled in
>> probe_kernel_address.
>>
>> Fix this by using __copy_from_user stead of probe_kernel_address.
>>
>> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> 
> NAK.
> 
> The "scheduling while atomic warning in alignment handling code" is
> caused by fixing up the page fault while trying to handle the
> mis-alignment fault generated from an instruction in atomic context.

__might_sleep is called in the function  __get_user which lead to that bug.
And that bug is triggered in a kernel space. Page fault can not be generated.
Right?

> Your patch re-introduces that bug.
> 
>> ---
>>  arch/arm/mm/alignment.c | 16 +++++++++++++---
>>  1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
>> index 04b3643..2ccabd3 100644
>> --- a/arch/arm/mm/alignment.c
>> +++ b/arch/arm/mm/alignment.c
>> @@ -774,6 +774,7 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>>  	unsigned long instr = 0, instrptr;
>>  	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
>>  	unsigned int type;
>> +	mm_segment_t fs;
>>  	unsigned int fault;
>>  	u16 tinstr = 0;
>>  	int isize = 4;
>> @@ -784,16 +785,22 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>>  
>>  	instrptr = instruction_pointer(regs);
>>  
>> +	fs = get_fs();
>> +	set_fs(KERNEL_DS);
>>  	if (thumb_mode(regs)) {
>>  		u16 *ptr = (u16 *)(instrptr & ~1);
>> -		fault = probe_kernel_address(ptr, tinstr);
>> +		fault = __copy_from_user(tinstr,
>> +				(__force const void __user *)ptr,
>> +				sizeof(tinstr));
>>  		tinstr = __mem_to_opcode_thumb16(tinstr);
>>  		if (!fault) {
>>  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
>>  			    IS_T32(tinstr)) {
>>  				/* Thumb-2 32-bit */
>>  				u16 tinst2 = 0;
>> -				fault = probe_kernel_address(ptr + 1, tinst2);
>> +				fault = __copy_from_user(tinst2,
>> +						(__force const void __user *)(ptr+1),
>> +						sizeof(tinst2));
>>  				tinst2 = __mem_to_opcode_thumb16(tinst2);
>>  				instr = __opcode_thumb32_compose(tinstr, tinst2);
>>  				thumb2_32b = 1;
>> @@ -803,10 +810,13 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
>>  			}
>>  		}
>>  	} else {
>> -		fault = probe_kernel_address((void *)instrptr, instr);
>> +		fault = __copy_from_user(instr,
>> +				(__force const void __user *)instrptr,
>> +				sizeof(instr));
>>  		instr = __mem_to_opcode_arm(instr);
>>  	}
>>  
>> +	set_fs(fs);
>>  	if (fault) {
>>  		type = TYPE_FAULT;
>>  		goto bad_or_fault;
>> -- 
>> 1.8.3.1
>>
>>
>
Russell King (Oracle) Aug. 31, 2019, 7:55 a.m. UTC | #8
On Sat, Aug 31, 2019 at 09:49:45AM +0800, Jing Xiangfeng wrote:
> On 2019/8/30 21:35, Russell King - ARM Linux admin wrote:
> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> >> The function do_alignment can handle misaligned address for user and
> >> kernel space. If it is a userspace access, do_alignment may fail on
> >> a low-memory situation, because page faults are disabled in
> >> probe_kernel_address.
> >>
> >> Fix this by using __copy_from_user stead of probe_kernel_address.
> >>
> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> > 
> > NAK.
> > 
> > The "scheduling while atomic warning in alignment handling code" is
> > caused by fixing up the page fault while trying to handle the
> > mis-alignment fault generated from an instruction in atomic context.
> 
> __might_sleep is called in the function  __get_user which lead to that bug.
> And that bug is triggered in a kernel space. Page fault can not be generated.
> Right?

Your email is now fixed?

All of get_user(), __get_user(), copy_from_user() and __copy_from_user()
_can_ cause a page fault, which might need to fetch the page from disk.
All these four functions are equivalent as far as that goes - and indeed
as are their versions that write as well.

If the page needs to come from disk, all of these functions _will_
sleep.  If they are called from an atomic context, and the page fault
handler needs to fetch data from disk, they will attempt to sleep,
which will issue a warning.
Jing Xiangfeng Aug. 31, 2019, 9:16 a.m. UTC | #9
On 2019/8/31 15:55, Russell King - ARM Linux admin wrote:
> On Sat, Aug 31, 2019 at 09:49:45AM +0800, Jing Xiangfeng wrote:
>> On 2019/8/30 21:35, Russell King - ARM Linux admin wrote:
>>> On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>>>> The function do_alignment can handle misaligned address for user and
>>>> kernel space. If it is a userspace access, do_alignment may fail on
>>>> a low-memory situation, because page faults are disabled in
>>>> probe_kernel_address.
>>>>
>>>> Fix this by using __copy_from_user stead of probe_kernel_address.
>>>>
>>>> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>>>> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>>>
>>> NAK.
>>>
>>> The "scheduling while atomic warning in alignment handling code" is
>>> caused by fixing up the page fault while trying to handle the
>>> mis-alignment fault generated from an instruction in atomic context.
>>
>> __might_sleep is called in the function  __get_user which lead to that bug.
>> And that bug is triggered in a kernel space. Page fault can not be generated.
>> Right?
> 
> Your email is now fixed?

Yeah, I just checked the mailbox, it is normal now.

> 
> All of get_user(), __get_user(), copy_from_user() and __copy_from_user()
> _can_ cause a page fault, which might need to fetch the page from disk.
> All these four functions are equivalent as far as that goes - and indeed
> as are their versions that write as well.
> 
> If the page needs to come from disk, all of these functions _will_
> sleep.  If they are called from an atomic context, and the page fault
> handler needs to fetch data from disk, they will attempt to sleep,
> which will issue a warning.
> 
 I understand.

	Thanks
Eric W. Biederman Sept. 2, 2019, 5:36 p.m. UTC | #10
Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:

> On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
>> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> 
>> > On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
>> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> >> 
>> >> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>> >> >> The function do_alignment can handle misaligned address for user and
>> >> >> kernel space. If it is a userspace access, do_alignment may fail on
>> >> >> a low-memory situation, because page faults are disabled in
>> >> >> probe_kernel_address.
>> >> >> 
>> >> >> Fix this by using __copy_from_user stead of probe_kernel_address.
>> >> >> 
>> >> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>> >> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>> >> >
>> >> > NAK.
>> >> >
>> >> > The "scheduling while atomic warning in alignment handling code" is
>> >> > caused by fixing up the page fault while trying to handle the
>> >> > mis-alignment fault generated from an instruction in atomic context.
>> >> >
>> >> > Your patch re-introduces that bug.
>> >> 
>> >> And the patch that fixed scheduling while atomic apparently introduced a
>> >> regression.  Admittedly a regression that took 6 years to track down but
>> >> still.
>> >
>> > Right, and given the number of years, we are trading one regression for
>> > a different regression.  If we revert to the original code where we
>> > fix up, we will end up with people complaining about a "new" regression
>> > caused by reverting the previous fix.  Follow this policy and we just
>> > end up constantly reverting the previous revert.
>> >
>> > The window is very small - the page in question will have had to have
>> > instructions read from it immediately prior to the handler being entered,
>> > and would have had to be made "old" before subsequently being unmapped.
>> 
>> > Rather than excessively complicating the code and making it even more
>> > inefficient (as in your patch), we could instead retry executing the
>> > instruction when we discover that the page is unavailable, which should
>> > cause the page to be paged back in.
>> 
>> My patch does not introduce any inefficiencies.  It onlys moves the
>> check for user_mode up a bit.  My patch did duplicate the code.
>> 
>> > If the page really is unavailable, the prefetch abort should cause a
>> > SEGV to be raised, otherwise the re-execution should replace the page.
>> >
>> > The danger to that approach is we page it back in, and it gets paged
>> > back out before we're able to read the instruction indefinitely.
>> 
>> I would think either a little code duplication or a function that looks
>> at user_mode(regs) and picks the appropriate kind of copy to do would be
>> the best way to go.  Because what needs to happen in the two cases for
>> reading the instruction are almost completely different.
>
> That is what I mean.  I'd prefer to avoid that with the large chunk of
> code.  How about instead adding a local replacement for
> probe_kernel_address() that just sorts out the reading, rather than
> duplicating all the code to deal with thumb fixup.

So something like this should be fine?

Jing Xiangfeng can you test this please?  I think this fixes your issue
but I don't currently have an arm development box where I could test this.

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 04b36436cbc0..b07d17ca0ae5 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -767,6 +767,23 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
 	return NULL;
 }
 
+static inline unsigned long
+copy_instr(bool umode, void *dst, unsigned long instrptr, size_t size)
+{
+	unsigned long result;
+	if (umode) {
+		void __user *src = (void *)instrptr;
+		result = copy_from_user(dst, src, size);
+	} else {
+		void *src = (void *)instrptr;
+		result = probe_kernel_read(dst, src, size);
+	}
+	/* Convert short reads into -EFAULT */
+	if ((result >= 0) && (result < size))
+		result = -EFAULT;
+	return result;
+}
+
 static int
 do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
@@ -778,22 +795,24 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	u16 tinstr = 0;
 	int isize = 4;
 	int thumb2_32b = 0;
+	bool umode;
 
 	if (interrupts_enabled(regs))
 		local_irq_enable();
 
 	instrptr = instruction_pointer(regs);
+	umode = user_mode(regs);
 
 	if (thumb_mode(regs)) {
-		u16 *ptr = (u16 *)(instrptr & ~1);
-		fault = probe_kernel_address(ptr, tinstr);
+		unsigned long tinstrptr = instrptr & ~1;
+		fault = copy_instr(umode, &tinstr, tinstrptr, 2);
 		tinstr = __mem_to_opcode_thumb16(tinstr);
 		if (!fault) {
 			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
 			    IS_T32(tinstr)) {
 				/* Thumb-2 32-bit */
 				u16 tinst2 = 0;
-				fault = probe_kernel_address(ptr + 1, tinst2);
+				fault = copy_instr(umode, &tinst2, tinstrptr + 2, 2);
 				tinst2 = __mem_to_opcode_thumb16(tinst2);
 				instr = __opcode_thumb32_compose(tinstr, tinst2);
 				thumb2_32b = 1;
@@ -803,7 +822,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 			}
 		}
 	} else {
-		fault = probe_kernel_address((void *)instrptr, instr);
+		fault = copy_instr(umode, &instr, instrptr, 4);
 		instr = __mem_to_opcode_arm(instr);
 	}
 
@@ -812,7 +831,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		goto bad_or_fault;
 	}
 
-	if (user_mode(regs))
+	if (umode)
 		goto user;
 
 	ai_sys += 1;
Jing Xiangfeng Sept. 4, 2019, 2:17 a.m. UTC | #11
On 2019/9/3 1:36, Eric W. Biederman wrote:
> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> 
>> On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
>>> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>>>
>>>> On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
>>>>> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>>>>>
>>>>>> On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>>>>>>> The function do_alignment can handle misaligned address for user and
>>>>>>> kernel space. If it is a userspace access, do_alignment may fail on
>>>>>>> a low-memory situation, because page faults are disabled in
>>>>>>> probe_kernel_address.
>>>>>>>
>>>>>>> Fix this by using __copy_from_user stead of probe_kernel_address.
>>>>>>>
>>>>>>> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>>>>>>> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>>>>>>
>>>>>> NAK.
>>>>>>
>>>>>> The "scheduling while atomic warning in alignment handling code" is
>>>>>> caused by fixing up the page fault while trying to handle the
>>>>>> mis-alignment fault generated from an instruction in atomic context.
>>>>>>
>>>>>> Your patch re-introduces that bug.
>>>>>
>>>>> And the patch that fixed scheduling while atomic apparently introduced a
>>>>> regression.  Admittedly a regression that took 6 years to track down but
>>>>> still.
>>>>
>>>> Right, and given the number of years, we are trading one regression for
>>>> a different regression.  If we revert to the original code where we
>>>> fix up, we will end up with people complaining about a "new" regression
>>>> caused by reverting the previous fix.  Follow this policy and we just
>>>> end up constantly reverting the previous revert.
>>>>
>>>> The window is very small - the page in question will have had to have
>>>> instructions read from it immediately prior to the handler being entered,
>>>> and would have had to be made "old" before subsequently being unmapped.
>>>
>>>> Rather than excessively complicating the code and making it even more
>>>> inefficient (as in your patch), we could instead retry executing the
>>>> instruction when we discover that the page is unavailable, which should
>>>> cause the page to be paged back in.
>>>
>>> My patch does not introduce any inefficiencies.  It onlys moves the
>>> check for user_mode up a bit.  My patch did duplicate the code.
>>>
>>>> If the page really is unavailable, the prefetch abort should cause a
>>>> SEGV to be raised, otherwise the re-execution should replace the page.
>>>>
>>>> The danger to that approach is we page it back in, and it gets paged
>>>> back out before we're able to read the instruction indefinitely.
>>>
>>> I would think either a little code duplication or a function that looks
>>> at user_mode(regs) and picks the appropriate kind of copy to do would be
>>> the best way to go.  Because what needs to happen in the two cases for
>>> reading the instruction are almost completely different.
>>
>> That is what I mean.  I'd prefer to avoid that with the large chunk of
>> code.  How about instead adding a local replacement for
>> probe_kernel_address() that just sorts out the reading, rather than
>> duplicating all the code to deal with thumb fixup.
> 
> So something like this should be fine?
> 
> Jing Xiangfeng can you test this please?  I think this fixes your issue
> but I don't currently have an arm development box where I could test this.
> 
Yes, I have tested and it can fix my issue in kernel 4.19.

> diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
> index 04b36436cbc0..b07d17ca0ae5 100644
> --- a/arch/arm/mm/alignment.c
> +++ b/arch/arm/mm/alignment.c
> @@ -767,6 +767,23 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
>  	return NULL;
>  }
>  
> +static inline unsigned long
> +copy_instr(bool umode, void *dst, unsigned long instrptr, size_t size)
> +{
> +	unsigned long result;
> +	if (umode) {
> +		void __user *src = (void *)instrptr;
> +		result = copy_from_user(dst, src, size);
> +	} else {
> +		void *src = (void *)instrptr;
> +		result = probe_kernel_read(dst, src, size);
> +	}
> +	/* Convert short reads into -EFAULT */
> +	if ((result >= 0) && (result < size))
> +		result = -EFAULT;
> +	return result;
> +}
> +
>  static int
>  do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  {
> @@ -778,22 +795,24 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  	u16 tinstr = 0;
>  	int isize = 4;
>  	int thumb2_32b = 0;
> +	bool umode;
>  
>  	if (interrupts_enabled(regs))
>  		local_irq_enable();
>  
>  	instrptr = instruction_pointer(regs);
> +	umode = user_mode(regs);
>  
>  	if (thumb_mode(regs)) {
> -		u16 *ptr = (u16 *)(instrptr & ~1);
> -		fault = probe_kernel_address(ptr, tinstr);
> +		unsigned long tinstrptr = instrptr & ~1;
> +		fault = copy_instr(umode, &tinstr, tinstrptr, 2);
>  		tinstr = __mem_to_opcode_thumb16(tinstr);
>  		if (!fault) {
>  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
>  			    IS_T32(tinstr)) {
>  				/* Thumb-2 32-bit */
>  				u16 tinst2 = 0;
> -				fault = probe_kernel_address(ptr + 1, tinst2);
> +				fault = copy_instr(umode, &tinst2, tinstrptr + 2, 2);
>  				tinst2 = __mem_to_opcode_thumb16(tinst2);
>  				instr = __opcode_thumb32_compose(tinstr, tinst2);
>  				thumb2_32b = 1;
> @@ -803,7 +822,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  			}
>  		}
>  	} else {
> -		fault = probe_kernel_address((void *)instrptr, instr);
> +		fault = copy_instr(umode, &instr, instrptr, 4);
>  		instr = __mem_to_opcode_arm(instr);
>  	}
>  
> @@ -812,7 +831,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  		goto bad_or_fault;
>  	}
>  
> -	if (user_mode(regs))
> +	if (umode)
>  		goto user;
>  
>  	ai_sys += 1;
> 
> .
>
Russell King (Oracle) Sept. 6, 2019, 3:17 p.m. UTC | #12
On Mon, Sep 02, 2019 at 12:36:56PM -0500, Eric W. Biederman wrote:
> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> 
> > On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> >> 
> >> > On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
> >> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> >> >> 
> >> >> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> >> >> >> The function do_alignment can handle misaligned address for user and
> >> >> >> kernel space. If it is a userspace access, do_alignment may fail on
> >> >> >> a low-memory situation, because page faults are disabled in
> >> >> >> probe_kernel_address.
> >> >> >> 
> >> >> >> Fix this by using __copy_from_user stead of probe_kernel_address.
> >> >> >> 
> >> >> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> >> >> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> >> >> >
> >> >> > NAK.
> >> >> >
> >> >> > The "scheduling while atomic warning in alignment handling code" is
> >> >> > caused by fixing up the page fault while trying to handle the
> >> >> > mis-alignment fault generated from an instruction in atomic context.
> >> >> >
> >> >> > Your patch re-introduces that bug.
> >> >> 
> >> >> And the patch that fixed scheduling while atomic apparently introduced a
> >> >> regression.  Admittedly a regression that took 6 years to track down but
> >> >> still.
> >> >
> >> > Right, and given the number of years, we are trading one regression for
> >> > a different regression.  If we revert to the original code where we
> >> > fix up, we will end up with people complaining about a "new" regression
> >> > caused by reverting the previous fix.  Follow this policy and we just
> >> > end up constantly reverting the previous revert.
> >> >
> >> > The window is very small - the page in question will have had to have
> >> > instructions read from it immediately prior to the handler being entered,
> >> > and would have had to be made "old" before subsequently being unmapped.
> >> 
> >> > Rather than excessively complicating the code and making it even more
> >> > inefficient (as in your patch), we could instead retry executing the
> >> > instruction when we discover that the page is unavailable, which should
> >> > cause the page to be paged back in.
> >> 
> >> My patch does not introduce any inefficiencies.  It onlys moves the
> >> check for user_mode up a bit.  My patch did duplicate the code.
> >> 
> >> > If the page really is unavailable, the prefetch abort should cause a
> >> > SEGV to be raised, otherwise the re-execution should replace the page.
> >> >
> >> > The danger to that approach is we page it back in, and it gets paged
> >> > back out before we're able to read the instruction indefinitely.
> >> 
> >> I would think either a little code duplication or a function that looks
> >> at user_mode(regs) and picks the appropriate kind of copy to do would be
> >> the best way to go.  Because what needs to happen in the two cases for
> >> reading the instruction are almost completely different.
> >
> > That is what I mean.  I'd prefer to avoid that with the large chunk of
> > code.  How about instead adding a local replacement for
> > probe_kernel_address() that just sorts out the reading, rather than
> > duplicating all the code to deal with thumb fixup.
> 
> So something like this should be fine?
> 
> Jing Xiangfeng can you test this please?  I think this fixes your issue
> but I don't currently have an arm development box where I could test this.

Sorry, only just got around to this again.  What I came up with is this:

8<===
From: Russell King <rmk+kernel@armlinux.org.uk>
Subject: [PATCH] ARM: mm: fix alignment

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/alignment.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6067fa4de22b..529f54d94709 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -765,6 +765,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
 	return NULL;
 }
 
+static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst)
+{
+	u32 instr = 0;
+	int fault;
+
+	if (user_mode(regs))
+		fault = get_user(instr, ip);
+	else
+		fault = probe_kernel_address(ip, instr);
+
+	*inst = __mem_to_opcode_arm(instr);
+
+	return fault;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
+{
+	u16 instr = 0;
+	int fault;
+
+	if (user_mode(regs))
+		fault = get_user(instr, ip);
+	else
+		fault = probe_kernel_address(ip, instr);
+
+	*inst = __mem_to_opcode_thumb16(instr);
+
+	return fault;
+}
+
 static int
 do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
@@ -772,10 +802,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	unsigned long instr = 0, instrptr;
 	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
 	unsigned int type;
-	unsigned int fault;
 	u16 tinstr = 0;
 	int isize = 4;
 	int thumb2_32b = 0;
+	int fault;
 
 	if (interrupts_enabled(regs))
 		local_irq_enable();
@@ -784,15 +814,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
 	if (thumb_mode(regs)) {
 		u16 *ptr = (u16 *)(instrptr & ~1);
-		fault = probe_kernel_address(ptr, tinstr);
-		tinstr = __mem_to_opcode_thumb16(tinstr);
+
+		fault = alignment_get_thumb(regs, ptr, &tinstr);
 		if (!fault) {
 			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
 			    IS_T32(tinstr)) {
 				/* Thumb-2 32-bit */
-				u16 tinst2 = 0;
-				fault = probe_kernel_address(ptr + 1, tinst2);
-				tinst2 = __mem_to_opcode_thumb16(tinst2);
+				u16 tinst2;
+				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
 				instr = __opcode_thumb32_compose(tinstr, tinst2);
 				thumb2_32b = 1;
 			} else {
@@ -801,8 +830,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 			}
 		}
 	} else {
-		fault = probe_kernel_address((void *)instrptr, instr);
-		instr = __mem_to_opcode_arm(instr);
+		fault = alignment_get_arm(regs, (void *)instrptr, &instr);
 	}
 
 	if (fault) {
Russell King (Oracle) Sept. 15, 2019, 6:34 p.m. UTC | #13
On Fri, Sep 06, 2019 at 04:17:59PM +0100, Russell King - ARM Linux admin wrote:
> On Mon, Sep 02, 2019 at 12:36:56PM -0500, Eric W. Biederman wrote:
> > Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> > 
> > > On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
> > >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> > >> 
> > >> > On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
> > >> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
> > >> >> 
> > >> >> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
> > >> >> >> The function do_alignment can handle misaligned address for user and
> > >> >> >> kernel space. If it is a userspace access, do_alignment may fail on
> > >> >> >> a low-memory situation, because page faults are disabled in
> > >> >> >> probe_kernel_address.
> > >> >> >> 
> > >> >> >> Fix this by using __copy_from_user stead of probe_kernel_address.
> > >> >> >> 
> > >> >> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
> > >> >> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
> > >> >> >
> > >> >> > NAK.
> > >> >> >
> > >> >> > The "scheduling while atomic warning in alignment handling code" is
> > >> >> > caused by fixing up the page fault while trying to handle the
> > >> >> > mis-alignment fault generated from an instruction in atomic context.
> > >> >> >
> > >> >> > Your patch re-introduces that bug.
> > >> >> 
> > >> >> And the patch that fixed scheduling while atomic apparently introduced a
> > >> >> regression.  Admittedly a regression that took 6 years to track down but
> > >> >> still.
> > >> >
> > >> > Right, and given the number of years, we are trading one regression for
> > >> > a different regression.  If we revert to the original code where we
> > >> > fix up, we will end up with people complaining about a "new" regression
> > >> > caused by reverting the previous fix.  Follow this policy and we just
> > >> > end up constantly reverting the previous revert.
> > >> >
> > >> > The window is very small - the page in question will have had to have
> > >> > instructions read from it immediately prior to the handler being entered,
> > >> > and would have had to be made "old" before subsequently being unmapped.
> > >> 
> > >> > Rather than excessively complicating the code and making it even more
> > >> > inefficient (as in your patch), we could instead retry executing the
> > >> > instruction when we discover that the page is unavailable, which should
> > >> > cause the page to be paged back in.
> > >> 
> > >> My patch does not introduce any inefficiencies.  It onlys moves the
> > >> check for user_mode up a bit.  My patch did duplicate the code.
> > >> 
> > >> > If the page really is unavailable, the prefetch abort should cause a
> > >> > SEGV to be raised, otherwise the re-execution should replace the page.
> > >> >
> > >> > The danger to that approach is we page it back in, and it gets paged
> > >> > back out before we're able to read the instruction indefinitely.
> > >> 
> > >> I would think either a little code duplication or a function that looks
> > >> at user_mode(regs) and picks the appropriate kind of copy to do would be
> > >> the best way to go.  Because what needs to happen in the two cases for
> > >> reading the instruction are almost completely different.
> > >
> > > That is what I mean.  I'd prefer to avoid that with the large chunk of
> > > code.  How about instead adding a local replacement for
> > > probe_kernel_address() that just sorts out the reading, rather than
> > > duplicating all the code to deal with thumb fixup.
> > 
> > So something like this should be fine?
> > 
> > Jing Xiangfeng can you test this please?  I think this fixes your issue
> > but I don't currently have an arm development box where I could test this.
> 
> Sorry, only just got around to this again.  What I came up with is this:

I've heard nothing, so I've done nothing...

> 8<===
> From: Russell King <rmk+kernel@armlinux.org.uk>
> Subject: [PATCH] ARM: mm: fix alignment
> 
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> ---
>  arch/arm/mm/alignment.c | 44 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 36 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
> index 6067fa4de22b..529f54d94709 100644
> --- a/arch/arm/mm/alignment.c
> +++ b/arch/arm/mm/alignment.c
> @@ -765,6 +765,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
>  	return NULL;
>  }
>  
> +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst)
> +{
> +	u32 instr = 0;
> +	int fault;
> +
> +	if (user_mode(regs))
> +		fault = get_user(instr, ip);
> +	else
> +		fault = probe_kernel_address(ip, instr);
> +
> +	*inst = __mem_to_opcode_arm(instr);
> +
> +	return fault;
> +}
> +
> +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
> +{
> +	u16 instr = 0;
> +	int fault;
> +
> +	if (user_mode(regs))
> +		fault = get_user(instr, ip);
> +	else
> +		fault = probe_kernel_address(ip, instr);
> +
> +	*inst = __mem_to_opcode_thumb16(instr);
> +
> +	return fault;
> +}
> +
>  static int
>  do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  {
> @@ -772,10 +802,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  	unsigned long instr = 0, instrptr;
>  	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
>  	unsigned int type;
> -	unsigned int fault;
>  	u16 tinstr = 0;
>  	int isize = 4;
>  	int thumb2_32b = 0;
> +	int fault;
>  
>  	if (interrupts_enabled(regs))
>  		local_irq_enable();
> @@ -784,15 +814,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  
>  	if (thumb_mode(regs)) {
>  		u16 *ptr = (u16 *)(instrptr & ~1);
> -		fault = probe_kernel_address(ptr, tinstr);
> -		tinstr = __mem_to_opcode_thumb16(tinstr);
> +
> +		fault = alignment_get_thumb(regs, ptr, &tinstr);
>  		if (!fault) {
>  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
>  			    IS_T32(tinstr)) {
>  				/* Thumb-2 32-bit */
> -				u16 tinst2 = 0;
> -				fault = probe_kernel_address(ptr + 1, tinst2);
> -				tinst2 = __mem_to_opcode_thumb16(tinst2);
> +				u16 tinst2;
> +				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
>  				instr = __opcode_thumb32_compose(tinstr, tinst2);
>  				thumb2_32b = 1;
>  			} else {
> @@ -801,8 +830,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>  			}
>  		}
>  	} else {
> -		fault = probe_kernel_address((void *)instrptr, instr);
> -		instr = __mem_to_opcode_arm(instr);
> +		fault = alignment_get_arm(regs, (void *)instrptr, &instr);
>  	}
>  
>  	if (fault) {
> -- 
> 2.7.4
> 
> -- 
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
> According to speedtest.net: 11.9Mbps down 500kbps up
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Eric W. Biederman Sept. 16, 2019, 2:31 p.m. UTC | #14
Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:

> On Fri, Sep 06, 2019 at 04:17:59PM +0100, Russell King - ARM Linux admin wrote:
>> On Mon, Sep 02, 2019 at 12:36:56PM -0500, Eric W. Biederman wrote:
>> > Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> > 
>> > > On Fri, Aug 30, 2019 at 04:02:48PM -0500, Eric W. Biederman wrote:
>> > >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> > >> 
>> > >> > On Fri, Aug 30, 2019 at 02:45:36PM -0500, Eric W. Biederman wrote:
>> > >> >> Russell King - ARM Linux admin <linux@armlinux.org.uk> writes:
>> > >> >> 
>> > >> >> > On Fri, Aug 30, 2019 at 09:31:17PM +0800, Jing Xiangfeng wrote:
>> > >> >> >> The function do_alignment can handle misaligned address for user and
>> > >> >> >> kernel space. If it is a userspace access, do_alignment may fail on
>> > >> >> >> a low-memory situation, because page faults are disabled in
>> > >> >> >> probe_kernel_address.
>> > >> >> >> 
>> > >> >> >> Fix this by using __copy_from_user stead of probe_kernel_address.
>> > >> >> >> 
>> > >> >> >> Fixes: b255188 ("ARM: fix scheduling while atomic warning in alignment handling code")
>> > >> >> >> Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
>> > >> >> >
>> > >> >> > NAK.
>> > >> >> >
>> > >> >> > The "scheduling while atomic warning in alignment handling code" is
>> > >> >> > caused by fixing up the page fault while trying to handle the
>> > >> >> > mis-alignment fault generated from an instruction in atomic context.
>> > >> >> >
>> > >> >> > Your patch re-introduces that bug.
>> > >> >> 
>> > >> >> And the patch that fixed scheduling while atomic apparently introduced a
>> > >> >> regression.  Admittedly a regression that took 6 years to track down but
>> > >> >> still.
>> > >> >
>> > >> > Right, and given the number of years, we are trading one regression for
>> > >> > a different regression.  If we revert to the original code where we
>> > >> > fix up, we will end up with people complaining about a "new" regression
>> > >> > caused by reverting the previous fix.  Follow this policy and we just
>> > >> > end up constantly reverting the previous revert.
>> > >> >
>> > >> > The window is very small - the page in question will have had to have
>> > >> > instructions read from it immediately prior to the handler being entered,
>> > >> > and would have had to be made "old" before subsequently being unmapped.
>> > >> 
>> > >> > Rather than excessively complicating the code and making it even more
>> > >> > inefficient (as in your patch), we could instead retry executing the
>> > >> > instruction when we discover that the page is unavailable, which should
>> > >> > cause the page to be paged back in.
>> > >> 
>> > >> My patch does not introduce any inefficiencies.  It onlys moves the
>> > >> check for user_mode up a bit.  My patch did duplicate the code.
>> > >> 
>> > >> > If the page really is unavailable, the prefetch abort should cause a
>> > >> > SEGV to be raised, otherwise the re-execution should replace the page.
>> > >> >
>> > >> > The danger to that approach is we page it back in, and it gets paged
>> > >> > back out before we're able to read the instruction indefinitely.
>> > >> 
>> > >> I would think either a little code duplication or a function that looks
>> > >> at user_mode(regs) and picks the appropriate kind of copy to do would be
>> > >> the best way to go.  Because what needs to happen in the two cases for
>> > >> reading the instruction are almost completely different.
>> > >
>> > > That is what I mean.  I'd prefer to avoid that with the large chunk of
>> > > code.  How about instead adding a local replacement for
>> > > probe_kernel_address() that just sorts out the reading, rather than
>> > > duplicating all the code to deal with thumb fixup.
>> > 
>> > So something like this should be fine?
>> > 
>> > Jing Xiangfeng can you test this please?  I think this fixes your issue
>> > but I don't currently have an arm development box where I could test this.
>> 
>> Sorry, only just got around to this again.  What I came up with is this:
>
> I've heard nothing, so I've done nothing...

Sorry it wasn't clear you were looking for feedback.

This looks functionally equivalent to the last test version I posted and
that Jing Xiangfeng confirms solves his issue.

So I say please merge whichever version you like.

Eric

>> 8<===
>> From: Russell King <rmk+kernel@armlinux.org.uk>
>> Subject: [PATCH] ARM: mm: fix alignment
>> 
>> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
>> ---
>>  arch/arm/mm/alignment.c | 44 ++++++++++++++++++++++++++++++++++++--------
>>  1 file changed, 36 insertions(+), 8 deletions(-)
>> 
>> diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
>> index 6067fa4de22b..529f54d94709 100644
>> --- a/arch/arm/mm/alignment.c
>> +++ b/arch/arm/mm/alignment.c
>> @@ -765,6 +765,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
>>  	return NULL;
>>  }
>>  
>> +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst)
>> +{
>> +	u32 instr = 0;
>> +	int fault;
>> +
>> +	if (user_mode(regs))
>> +		fault = get_user(instr, ip);
>> +	else
>> +		fault = probe_kernel_address(ip, instr);
>> +
>> +	*inst = __mem_to_opcode_arm(instr);
>> +
>> +	return fault;
>> +}
>> +
>> +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
>> +{
>> +	u16 instr = 0;
>> +	int fault;
>> +
>> +	if (user_mode(regs))
>> +		fault = get_user(instr, ip);
>> +	else
>> +		fault = probe_kernel_address(ip, instr);
>> +
>> +	*inst = __mem_to_opcode_thumb16(instr);
>> +
>> +	return fault;
>> +}
>> +
>>  static int
>>  do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>>  {
>> @@ -772,10 +802,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>>  	unsigned long instr = 0, instrptr;
>>  	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
>>  	unsigned int type;
>> -	unsigned int fault;
>>  	u16 tinstr = 0;
>>  	int isize = 4;
>>  	int thumb2_32b = 0;
>> +	int fault;
>>  
>>  	if (interrupts_enabled(regs))
>>  		local_irq_enable();
>> @@ -784,15 +814,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>>  
>>  	if (thumb_mode(regs)) {
>>  		u16 *ptr = (u16 *)(instrptr & ~1);
>> -		fault = probe_kernel_address(ptr, tinstr);
>> -		tinstr = __mem_to_opcode_thumb16(tinstr);
>> +
>> +		fault = alignment_get_thumb(regs, ptr, &tinstr);
>>  		if (!fault) {
>>  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
>>  			    IS_T32(tinstr)) {
>>  				/* Thumb-2 32-bit */
>> -				u16 tinst2 = 0;
>> -				fault = probe_kernel_address(ptr + 1, tinst2);
>> -				tinst2 = __mem_to_opcode_thumb16(tinst2);
>> +				u16 tinst2;
>> +				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
>>  				instr = __opcode_thumb32_compose(tinstr, tinst2);
>>  				thumb2_32b = 1;
>>  			} else {
>> @@ -801,8 +830,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
>>  			}
>>  		}
>>  	} else {
>> -		fault = probe_kernel_address((void *)instrptr, instr);
>> -		instr = __mem_to_opcode_arm(instr);
>> +		fault = alignment_get_arm(regs, (void *)instrptr, &instr);
>>  	}
>>  
>>  	if (fault) {
>> -- 
>> 2.7.4
>> 
>> -- 
>> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
>> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
>> According to speedtest.net: 11.9Mbps down 500kbps up
>> 
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>>
diff mbox series

Patch

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 04b3643..2ccabd3 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -774,6 +774,7 @@  static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
 	unsigned long instr = 0, instrptr;
 	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
 	unsigned int type;
+	mm_segment_t fs;
 	unsigned int fault;
 	u16 tinstr = 0;
 	int isize = 4;
@@ -784,16 +785,22 @@  static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
 
 	instrptr = instruction_pointer(regs);
 
+	fs = get_fs();
+	set_fs(KERNEL_DS);
 	if (thumb_mode(regs)) {
 		u16 *ptr = (u16 *)(instrptr & ~1);
-		fault = probe_kernel_address(ptr, tinstr);
+		fault = __copy_from_user(tinstr,
+				(__force const void __user *)ptr,
+				sizeof(tinstr));
 		tinstr = __mem_to_opcode_thumb16(tinstr);
 		if (!fault) {
 			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
 			    IS_T32(tinstr)) {
 				/* Thumb-2 32-bit */
 				u16 tinst2 = 0;
-				fault = probe_kernel_address(ptr + 1, tinst2);
+				fault = __copy_from_user(tinst2,
+						(__force const void __user *)(ptr+1),
+						sizeof(tinst2));
 				tinst2 = __mem_to_opcode_thumb16(tinst2);
 				instr = __opcode_thumb32_compose(tinstr, tinst2);
 				thumb2_32b = 1;
@@ -803,10 +810,13 @@  static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
 			}
 		}
 	} else {
-		fault = probe_kernel_address((void *)instrptr, instr);
+		fault = __copy_from_user(instr,
+				(__force const void __user *)instrptr,
+				sizeof(instr));
 		instr = __mem_to_opcode_arm(instr);
 	}
 
+	set_fs(fs);
 	if (fault) {
 		type = TYPE_FAULT;
 		goto bad_or_fault;