Message ID | B8AC3E80E903784988AB3003E3E97330C005B9BF@dggemm510-mbx.china.huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 16 October 2017 at 12:42, Liuwenliang (Lamb) <liuwenliang@huawei.com> wrote: > On 10/16/2017 07:03 PM, Abbott Liu wrote: >>arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movw r1, > #:lower16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode >>arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movt r1, > #:upper16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode > > Thanks for building test. This error can be solved by following code: > --- a/arch/arm/kernel/entry-armv.S > +++ b/arch/arm/kernel/entry-armv.S > @@ -188,8 +188,7 @@ ENDPROC(__und_invalid) > get_thread_info tsk > ldr r0, [tsk, #TI_ADDR_LIMIT] > #ifdef CONFIG_KASAN > - movw r1, #:lower16:TASK_SIZE > - movt r1, #:upper16:TASK_SIZE > + ldr r1, =TASK_SIZE > #else > mov r1, #TASK_SIZE > #endif This is unnecessary: ldr r1, =TASK_SIZE will be converted to a mov instruction by the assembler if the value of TASK_SIZE fits its 12-bit immediate field. So please remove the whole #ifdef, and just use ldr r1, =xxx > @@ -446,7 +445,12 @@ ENDPROC(__fiq_abt) > @ if it was interrupted in a critical region. Here we > @ perform a quick test inline since it should be false > @ 99.9999% of the time. The rest is done out of line. > +#if CONFIG_KASAN > + ldr r0, =TASK_SIZE > + cmp r4, r0 > +#else > cmp r4, #TASK_SIZE > +#endif > blhs kuser_cmpxchg64_fixup > #endif > #endif > > movt,movw can only be used in ARMv6*, ARMv7 instruction set. But ldr can be used in ARMv4*, ARMv5T*, ARMv6*, ARMv7. > Maybe the performance is going to fall down by using ldr, but I think the influence of performance is very limited. >
On 10/17/2017 12:40 AM, Abbott Liu wrote: > Ard Biesheuvel [ard.biesheuvel@linaro.org] wrote >This is unnecessary: > >ldr r1, =TASK_SIZE > >will be converted to a mov instruction by the assembler if the value of TASK_SIZE fits its 12-bit immediate field. > >So please remove the whole #ifdef, and just use ldr r1, =xxx Thanks for your review. The assembler on my computer don't convert ldr r1,=xxx into mov instruction. Here is the objdump for vmlinux: c0a3b100 <__irq_svc>: c0a3b100: e24dd04c sub sp, sp, #76 ; 0x4c c0a3b104: e31d0004 tst sp, #4 c0a3b108: 024dd004 subeq sp, sp, #4 c0a3b10c: e88d1ffe stm sp, {r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip} c0a3b110: e8900038 ldm r0, {r3, r4, r5} c0a3b114: e28d7030 add r7, sp, #48 ; 0x30 c0a3b118: e3e06000 mvn r6, #0 c0a3b11c: e28d204c add r2, sp, #76 ; 0x4c c0a3b120: 02822004 addeq r2, r2, #4 c0a3b124: e52d3004 push {r3} ; (str r3, [sp, #-4]!) c0a3b128: e1a0300e mov r3, lr c0a3b12c: e887007c stm r7, {r2, r3, r4, r5, r6} c0a3b130: e1a0972d lsr r9, sp, #14 c0a3b134: e1a09709 lsl r9, r9, #14 c0a3b138: e5990008 ldr r0, [r9, #8] ---c0a3b13c: e59f1054 ldr r1, [pc, #84] ; c0a3b198 <__irq_svc+0x98> //ldr r1, =TASK_SIZE c0a3b140: e5891008 str r1, [r9, #8] c0a3b144: e58d004c str r0, [sp, #76] ; 0x4c c0a3b148: ee130f10 mrc 15, 0, r0, cr3, cr0, {0} c0a3b14c: e58d0048 str r0, [sp, #72] ; 0x48 c0a3b150: e3a00051 mov r0, #81 ; 0x51 c0a3b154: ee030f10 mcr 15, 0, r0, cr3, cr0, {0} ---c0a3b158: e59f103c ldr r1, [pc, #60] ; c0a3b19c <__irq_svc+0x9c> //orginal irq_svc also used same instruction c0a3b15c: e1a0000d mov r0, sp c0a3b160: e28fe000 add lr, pc, #0 c0a3b164: e591f000 ldr pc, [r1] c0a3b168: e5998004 ldr r8, [r9, #4] c0a3b16c: e5990000 ldr r0, [r9] c0a3b170: e3380000 teq r8, #0 c0a3b174: 13a00000 movne r0, #0 c0a3b178: e3100002 tst r0, #2 c0a3b17c: 1b000007 blne c0a3b1a0 <svc_preempt> c0a3b180: e59d104c ldr r1, [sp, #76] ; 0x4c c0a3b184: e59d0048 ldr r0, [sp, #72] ; 0x48 c0a3b188: ee030f10 mcr 15, 0, r0, cr3, cr0, {0} c0a3b18c: e5891008 str r1, [r9, #8] c0a3b190: e16ff005 msr SPSR_fsxc, r5 c0a3b194: e8ddffff ldm sp, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, sp, lr, pc}^ ---c0a3b198: b6e00000 .word 0xb6e00000 //TASK_SIZE:0xb6e00000 c0a3b19c: c0ccccf0 .word 0xc0ccccf0 Even "ldr r1, =TASK_SIZE" won't be converted to a mov instruction by some assembler, I also think it is better to remove the whole #ifdef because the influence of performance by ldr is very limited.
On 17 October 2017 at 12:27, Liuwenliang (Lamb) <liuwenliang@huawei.com> wrote: > On 10/17/2017 12:40 AM, Abbott Liu wrote: >> Ard Biesheuvel [ard.biesheuvel@linaro.org] wrote >>This is unnecessary: >> >>ldr r1, =TASK_SIZE >> >>will be converted to a mov instruction by the assembler if the value of TASK_SIZE fits its 12-bit immediate field. >> >>So please remove the whole #ifdef, and just use ldr r1, =xxx > > Thanks for your review. > > The assembler on my computer don't convert ldr r1,=xxx into mov instruction. What I said was 'if the value of TASK_SIZE fits its 12-bit immediate field' and your value of TASK_SIZE is 0xb6e00000, which cannot be decomposed in the right way. If you build with KASAN disabled, it will generate a mov instruction instead. > Here is the objdump for vmlinux: > > c0a3b100 <__irq_svc>: > c0a3b100: e24dd04c sub sp, sp, #76 ; 0x4c > c0a3b104: e31d0004 tst sp, #4 > c0a3b108: 024dd004 subeq sp, sp, #4 > c0a3b10c: e88d1ffe stm sp, {r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip} > c0a3b110: e8900038 ldm r0, {r3, r4, r5} > c0a3b114: e28d7030 add r7, sp, #48 ; 0x30 > c0a3b118: e3e06000 mvn r6, #0 > c0a3b11c: e28d204c add r2, sp, #76 ; 0x4c > c0a3b120: 02822004 addeq r2, r2, #4 > c0a3b124: e52d3004 push {r3} ; (str r3, [sp, #-4]!) > c0a3b128: e1a0300e mov r3, lr > c0a3b12c: e887007c stm r7, {r2, r3, r4, r5, r6} > c0a3b130: e1a0972d lsr r9, sp, #14 > c0a3b134: e1a09709 lsl r9, r9, #14 > c0a3b138: e5990008 ldr r0, [r9, #8] > ---c0a3b13c: e59f1054 ldr r1, [pc, #84] ; c0a3b198 <__irq_svc+0x98> //ldr r1, =TASK_SIZE > c0a3b140: e5891008 str r1, [r9, #8] > c0a3b144: e58d004c str r0, [sp, #76] ; 0x4c > c0a3b148: ee130f10 mrc 15, 0, r0, cr3, cr0, {0} > c0a3b14c: e58d0048 str r0, [sp, #72] ; 0x48 > c0a3b150: e3a00051 mov r0, #81 ; 0x51 > c0a3b154: ee030f10 mcr 15, 0, r0, cr3, cr0, {0} > ---c0a3b158: e59f103c ldr r1, [pc, #60] ; c0a3b19c <__irq_svc+0x9c> //orginal irq_svc also used same instruction > c0a3b15c: e1a0000d mov r0, sp > c0a3b160: e28fe000 add lr, pc, #0 > c0a3b164: e591f000 ldr pc, [r1] > c0a3b168: e5998004 ldr r8, [r9, #4] > c0a3b16c: e5990000 ldr r0, [r9] > c0a3b170: e3380000 teq r8, #0 > c0a3b174: 13a00000 movne r0, #0 > c0a3b178: e3100002 tst r0, #2 > c0a3b17c: 1b000007 blne c0a3b1a0 <svc_preempt> > c0a3b180: e59d104c ldr r1, [sp, #76] ; 0x4c > c0a3b184: e59d0048 ldr r0, [sp, #72] ; 0x48 > c0a3b188: ee030f10 mcr 15, 0, r0, cr3, cr0, {0} > c0a3b18c: e5891008 str r1, [r9, #8] > c0a3b190: e16ff005 msr SPSR_fsxc, r5 > c0a3b194: e8ddffff ldm sp, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, sp, lr, pc}^ > ---c0a3b198: b6e00000 .word 0xb6e00000 //TASK_SIZE:0xb6e00000 > c0a3b19c: c0ccccf0 .word 0xc0ccccf0 > > > > Even "ldr r1, =TASK_SIZE" won't be converted to a mov instruction by some assembler, I also think it is better > to remove the whole #ifdef because the influence of performance by ldr is very limited. >
On 10/17/2017 8:45 PM, Abbott Liu wrote: >What I said was > >'if the value of TASK_SIZE fits its 12-bit immediate field' > >and your value of TASK_SIZE is 0xb6e00000, which cannot be decomposed in the right way. > >If you build with KASAN disabled, it will generate a mov instruction instead. Thanks for your explain. I understand now. I has tested and the testing result proves that what you said is right. Here is test log: c010e9e0 <__irq_svc>: c010e9e0: e24dd04c sub sp, sp, #76 ; 0x4c c010e9e4: e31d0004 tst sp, #4 c010e9e8: 024dd004 subeq sp, sp, #4 c010e9ec: e88d1ffe stm sp, {r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip} c010e9f0: e8900038 ldm r0, {r3, r4, r5} c010e9f4: e28d7030 add r7, sp, #48 ; 0x30 c010e9f8: e3e06000 mvn r6, #0 c010e9fc: e28d204c add r2, sp, #76 ; 0x4c c010ea00: 02822004 addeq r2, r2, #4 c010ea04: e52d3004 push {r3} ; (str r3, [sp, #-4]!) c010ea08: e1a0300e mov r3, lr c010ea0c: e887007c stm r7, {r2, r3, r4, r5, r6} c010ea10: e1a0972d lsr r9, sp, #14 c010ea14: e1a09709 lsl r9, r9, #14 c010ea18: e5990008 ldr r0, [r9, #8] c010ea1c: e3a014bf mov r1, #-1090519040 ; 0xbf000000 // ldr r1,=0xbf000000
On Mon, Oct 16, 2017 at 11:42:05AM +0000, Liuwenliang (Lamb) wrote: > On 10/16/2017 07:03 PM, Abbott Liu wrote: > >arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movw r1, > #:lower16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode > >arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movt r1, > #:upper16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode > > Thanks for building test. This error can be solved by following code: > --- a/arch/arm/kernel/entry-armv.S > +++ b/arch/arm/kernel/entry-armv.S > @@ -188,8 +188,7 @@ ENDPROC(__und_invalid) > get_thread_info tsk > ldr r0, [tsk, #TI_ADDR_LIMIT] > #ifdef CONFIG_KASAN > - movw r1, #:lower16:TASK_SIZE > - movt r1, #:upper16:TASK_SIZE > + ldr r1, =TASK_SIZE > #else > mov r1, #TASK_SIZE > #endif We can surely do better than this with macros and condition support - we can build-time test in the assembler whether TASK_SIZE can fit in a normal "mov", whether we can use the movw/movt instructions, or fall back to ldr if necessary. I'd rather we avoided "ldr" here where possible. > @@ -446,7 +445,12 @@ ENDPROC(__fiq_abt) > @ if it was interrupted in a critical region. Here we > @ perform a quick test inline since it should be false > @ 99.9999% of the time. The rest is done out of line. > +#if CONFIG_KASAN > + ldr r0, =TASK_SIZE > + cmp r4, r0 > +#else > cmp r4, #TASK_SIZE Same sort of thing goes for here - we can select the instruction at runtime using the assembler's macros and condition support. We know that TASK_SIZE is going to be one of a limited set of values.
On Mon, Oct 16, 2017 at 01:14:54PM +0100, Ard Biesheuvel wrote: > On 16 October 2017 at 12:42, Liuwenliang (Lamb) <liuwenliang@huawei.com> wrote: > > On 10/16/2017 07:03 PM, Abbott Liu wrote: > >>arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movw r1, > > #:lower16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode > >>arch/arm/kernel/entry-armv.S:348: Error: selected processor does not support `movt r1, > > #:upper16:((((0xC0000000-0x01000000)>>3)+((0xC0000000-0x01000000)-(1<<29))))' in ARM mode > > > > Thanks for building test. This error can be solved by following code: > > --- a/arch/arm/kernel/entry-armv.S > > +++ b/arch/arm/kernel/entry-armv.S > > @@ -188,8 +188,7 @@ ENDPROC(__und_invalid) > > get_thread_info tsk > > ldr r0, [tsk, #TI_ADDR_LIMIT] > > #ifdef CONFIG_KASAN > > - movw r1, #:lower16:TASK_SIZE > > - movt r1, #:upper16:TASK_SIZE > > + ldr r1, =TASK_SIZE > > #else > > mov r1, #TASK_SIZE > > #endif > > This is unnecessary: > > ldr r1, =TASK_SIZE > > will be converted to a mov instruction by the assembler if the value > of TASK_SIZE fits its 12-bit immediate field. It's an 8-bit immediate field for ARM. What it won't do is expand it to a pair of movw/movt instructions if it doesn't fit.
On Tue, Oct 17, 2017 at 11:27:19AM +0000, Liuwenliang (Lamb) wrote:
> ---c0a3b198: b6e00000 .word 0xb6e00000 //TASK_SIZE:0xb6e00000
It's probably going to be better all round to round TASK_SIZE down
to something that fits in an 8-bit rotated constant anyway (like
we already guarantee) which would mean this patch is not necessary.
--- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -188,8 +188,7 @@ ENDPROC(__und_invalid) get_thread_info tsk ldr r0, [tsk, #TI_ADDR_LIMIT] #ifdef CONFIG_KASAN - movw r1, #:lower16:TASK_SIZE - movt r1, #:upper16:TASK_SIZE + ldr r1, =TASK_SIZE #else mov r1, #TASK_SIZE #endif @@ -446,7 +445,12 @@ ENDPROC(__fiq_abt) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. +#if CONFIG_KASAN + ldr r0, =TASK_SIZE + cmp r4, r0 +#else cmp r4, #TASK_SIZE +#endif blhs kuser_cmpxchg64_fixup #endif #endif