Message ID | 1383852042-10780-1-git-send-email-sboyd@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote: > If we're running on a v7 ARM CPU, detect if the CPU supports the > sdiv/udiv instructions and replace the signed and unsigned > division library functions with an sdiv/udiv instruction. [snip] > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > index bd454b0..6ed6496 100644 > --- a/arch/arm/lib/Makefile > +++ b/arch/arm/lib/Makefile > @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ > io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ > call_with_stack.o > > +lib-$(CONFIG_CPU_V7) += div-v7.o > +CFLAGS_div-v7.o := -march=armv7-a Won't this fail to build if the compiler doesn't have armv7-a support. Perhaps we don't care about compilers that old. Rob
On 11:20 Thu 07 Nov , Stephen Boyd wrote: > If we're running on a v7 ARM CPU, detect if the CPU supports the > sdiv/udiv instructions and replace the signed and unsigned > division library functions with an sdiv/udiv instruction. > > Running the perf messaging benchmark in pipe mode > > $ perf bench sched messaging -p > > shows a modest improvement on my v7 CPU. > > before: > (5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805 > > after: > (4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538 > > (5.805 - 5.538) / 5.805 = 4.6% > > Signed-off-by: Stephen Boyd <sboyd@codeaurora.org> > --- > > Should we add in the __div0() call if the denominator is 0? > > arch/arm/kernel/setup.c | 10 +++++++++ > arch/arm/lib/Makefile | 3 +++ > arch/arm/lib/div-v7.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ > arch/arm/lib/lib1funcs.S | 16 +++++++++++++ > 4 files changed, 87 insertions(+) > create mode 100644 arch/arm/lib/div-v7.c > > diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c > index 0e1e2b3..7d519f4 100644 > --- a/arch/arm/kernel/setup.c > +++ b/arch/arm/kernel/setup.c > @@ -30,6 +30,7 @@ > #include <linux/bug.h> > #include <linux/compiler.h> > #include <linux/sort.h> > +#include <linux/static_key.h> > > #include <asm/unified.h> > #include <asm/cp15.h> > @@ -365,6 +366,8 @@ void __init early_print(const char *str, ...) > printk("%s", buf); > } > > +struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE; > + > static void __init cpuid_init_hwcaps(void) > { > unsigned int divide_instrs, vmsa; > @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void) > elf_hwcap |= HWCAP_IDIVT; > } > > +#ifdef CONFIG_THUMB2_KERNEL if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT) > + if (elf_hwcap & HWCAP_IDIVT) > +#else > + if (elf_hwcap & HWCAP_IDIVA) > +#endif > + static_key_slow_inc(&cpu_has_idiv); > + > /* LPAE implies atomic ldrd/strd instructions */ > vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; > if (vmsa >= 5) > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > index bd454b0..6ed6496 100644 > --- a/arch/arm/lib/Makefile > +++ b/arch/arm/lib/Makefile > @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ > io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ > call_with_stack.o > > +lib-$(CONFIG_CPU_V7) += div-v7.o > +CFLAGS_div-v7.o := -march=armv7-a > + > mmu-y := clear_user.o copy_page.o getuser.o putuser.o > > # the code in uaccess.S is not preemption safe and > diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c > new file mode 100644 > index 0000000..96ceb92 > --- /dev/null > +++ b/arch/arm/lib/div-v7.c > @@ -0,0 +1,58 @@ > +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 and > + * only version 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#include <linux/static_key.h> > + > +extern int ___aeabi_idiv(int, int); > +extern unsigned ___aeabi_uidiv(int, int); > + > +extern struct static_key cpu_has_idiv; > + > +int __aeabi_idiv(int numerator, int denominator) > +{ > + if (static_key_false(&cpu_has_idiv)) { > + int ret; > + > + asm volatile ( > + ".arch_extension idiv\n" > + "sdiv %0, %1, %2" > + : "=&r" (ret) > + : "r" (numerator), "r" (denominator)); > + > + return ret; > + } > + > + return ___aeabi_idiv(numerator, denominator); > +} > + > +int __divsi3(int numerator, int denominator) > + __attribute__((alias("__aeabi_idiv"))); > + > +unsigned __aeabi_uidiv(int numerator, int denominator) > +{ > + if (static_key_false(&cpu_has_idiv)) { > + int ret; > + > + asm volatile ( > + ".arch_extension idiv\n" > + "udiv %0, %1, %2" > + : "=&r" (ret) > + : "r" (numerator), "r" (denominator)); > + > + return ret; > + } > + > + return ___aeabi_uidiv(numerator, denominator); > +} > + > +unsigned __udivsi3(int numerator, int denominator) > + __attribute__((alias("__aeabi_uidiv"))); > diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S > index c562f64..adea088 100644 > --- a/arch/arm/lib/lib1funcs.S > +++ b/arch/arm/lib/lib1funcs.S > @@ -205,8 +205,12 @@ Boston, MA 02111-1307, USA. */ > .endm > > > +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) > ENTRY(__udivsi3) > ENTRY(__aeabi_uidiv) > +#else > +ENTRY(___aeabi_uidiv) > +#endif > UNWIND(.fnstart) > > subs r2, r1, #1 > @@ -232,8 +236,12 @@ UNWIND(.fnstart) > mov pc, lr > > UNWIND(.fnend) > +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) > ENDPROC(__udivsi3) > ENDPROC(__aeabi_uidiv) > +#else > +ENDPROC(___aeabi_uidiv) > +#endif > > ENTRY(__umodsi3) > UNWIND(.fnstart) > @@ -253,8 +261,12 @@ UNWIND(.fnstart) > UNWIND(.fnend) > ENDPROC(__umodsi3) > > +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) > ENTRY(__divsi3) > ENTRY(__aeabi_idiv) > +#else > +ENTRY(___aeabi_idiv) > +#endif > UNWIND(.fnstart) > > cmp r1, #0 > @@ -293,8 +305,12 @@ UNWIND(.fnstart) > mov pc, lr > > UNWIND(.fnend) > +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) > ENDPROC(__divsi3) > ENDPROC(__aeabi_idiv) > +#else > +ENDPROC(___aeabi_idiv) > +#endif > > ENTRY(__modsi3) > UNWIND(.fnstart) > -- > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, > hosted by The Linux Foundation > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On 19:34 Thu 07 Nov , Rob Herring wrote: > On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote: > > If we're running on a v7 ARM CPU, detect if the CPU supports the > > sdiv/udiv instructions and replace the signed and unsigned > > division library functions with an sdiv/udiv instruction. > > [snip] > > > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > > index bd454b0..6ed6496 100644 > > --- a/arch/arm/lib/Makefile > > +++ b/arch/arm/lib/Makefile > > @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ > > io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ > > call_with_stack.o > > > > +lib-$(CONFIG_CPU_V7) += div-v7.o > > +CFLAGS_div-v7.o := -march=armv7-a > > Won't this fail to build if the compiler doesn't have armv7-a support. > Perhaps we don't care about compilers that old. use the propoer compiler to compile a armv7 kernel Best Regards, J. > > Rob > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Hi Stephen, On 11/07/2013 02:20 PM, Stephen Boyd wrote: > If we're running on a v7 ARM CPU, detect if the CPU supports the > sdiv/udiv instructions and replace the signed and unsigned > division library functions with an sdiv/udiv instruction. [...] > +++ b/arch/arm/lib/div-v7.c > @@ -0,0 +1,58 @@ > +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 and > + * only version 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#include <linux/static_key.h> > + > +extern int ___aeabi_idiv(int, int); > +extern unsigned ___aeabi_uidiv(int, int); Why are the input parameters signed? > +extern struct static_key cpu_has_idiv; > + > +int __aeabi_idiv(int numerator, int denominator) > +{ > + if (static_key_false(&cpu_has_idiv)) { > + int ret; > + > + asm volatile ( > + ".arch_extension idiv\n" > + "sdiv %0, %1, %2" > + : "=&r" (ret) > + : "r" (numerator), "r" (denominator)); > + > + return ret; > + } > + > + return ___aeabi_idiv(numerator, denominator); > +} > + > +int __divsi3(int numerator, int denominator) > + __attribute__((alias("__aeabi_idiv"))); > + > +unsigned __aeabi_uidiv(int numerator, int denominator) Unsigned inputs? > +{ > + if (static_key_false(&cpu_has_idiv)) { > + int ret; > + > + asm volatile ( > + ".arch_extension idiv\n" > + "udiv %0, %1, %2" > + : "=&r" (ret) > + : "r" (numerator), "r" (denominator)); > + > + return ret; > + } > + > + return ___aeabi_uidiv(numerator, denominator); > +} > + > +unsigned __udivsi3(int numerator, int denominator) > + __attribute__((alias("__aeabi_uidiv"))); Unsigned inputs? [...] Thanks, Christopher
On Fri, Nov 08, 2013 at 10:58:42AM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote: > On 11:20 Thu 07 Nov , Stephen Boyd wrote: > > @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void) > > elf_hwcap |= HWCAP_IDIVT; > > } > > > > +#ifdef CONFIG_THUMB2_KERNEL > if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT) > > + if (elf_hwcap & HWCAP_IDIVT) > > +#else > > + if (elf_hwcap & HWCAP_IDIVA) > > +#endif Take another look, and you'll see the change that you're suggesting is wrong. Instead, the following may be a more reasonable suggestion as a suitable replacement: if (elf_hwcap & (IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA))
On Fri, Nov 08, 2013 at 12:50:04PM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote: > On 19:34 Thu 07 Nov , Rob Herring wrote: > > On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote: > > > If we're running on a v7 ARM CPU, detect if the CPU supports the > > > sdiv/udiv instructions and replace the signed and unsigned > > > division library functions with an sdiv/udiv instruction. > > > > [snip] > > > > > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile > > > index bd454b0..6ed6496 100644 > > > --- a/arch/arm/lib/Makefile > > > +++ b/arch/arm/lib/Makefile > > > @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ > > > io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ > > > call_with_stack.o > > > > > > +lib-$(CONFIG_CPU_V7) += div-v7.o > > > +CFLAGS_div-v7.o := -march=armv7-a > > > > Won't this fail to build if the compiler doesn't have armv7-a support. > > Perhaps we don't care about compilers that old. > > use the propoer compiler to compile a armv7 kernel It's probably about time to get rid of the conditionals for this in the main arch/arm/Makefile actually - some of those date back some 10 or so years. That's something for the v3.14 merge window.
Stephen Boyd <sboyd@codeaurora.org> writes: > +int __aeabi_idiv(int numerator, int denominator) > +{ > + if (static_key_false(&cpu_has_idiv)) { > + int ret; > + > + asm volatile ( > + ".arch_extension idiv\n" > + "sdiv %0, %1, %2" > + : "=&r" (ret) There is no need for the & in the output constraint. Dropping it allows using one of the source registers as destination which may sometimes be beneficial. > + : "r" (numerator), "r" (denominator)); > + > + return ret; > + } > + > + return ___aeabi_idiv(numerator, denominator); > +}
On 11/08/13 08:54, Russell King - ARM Linux wrote: > On Fri, Nov 08, 2013 at 12:50:04PM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote: >> On 19:34 Thu 07 Nov , Rob Herring wrote: >>> On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote: >>>> If we're running on a v7 ARM CPU, detect if the CPU supports the >>>> sdiv/udiv instructions and replace the signed and unsigned >>>> division library functions with an sdiv/udiv instruction. >>> [snip] >>> >>>> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile >>>> index bd454b0..6ed6496 100644 >>>> --- a/arch/arm/lib/Makefile >>>> +++ b/arch/arm/lib/Makefile >>>> @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ >>>> io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ >>>> call_with_stack.o >>>> >>>> +lib-$(CONFIG_CPU_V7) += div-v7.o >>>> +CFLAGS_div-v7.o := -march=armv7-a >>> Won't this fail to build if the compiler doesn't have armv7-a support. >>> Perhaps we don't care about compilers that old. >> use the propoer compiler to compile a armv7 kernel > It's probably about time to get rid of the conditionals for this in > the main arch/arm/Makefile actually - some of those date back some > 10 or so years. That's something for the v3.14 merge window. I'll take that as an endorsement for not falling back to -march=armv5t -Wa,-march=armv7-a like is done in arch/arm/Makefile.
On 11/08/13 08:48, Christopher Covington wrote: > Hi Stephen, > > On 11/07/2013 02:20 PM, Stephen Boyd wrote: >> If we're running on a v7 ARM CPU, detect if the CPU supports the >> sdiv/udiv instructions and replace the signed and unsigned >> division library functions with an sdiv/udiv instruction. > [...] > >> +++ b/arch/arm/lib/div-v7.c >> @@ -0,0 +1,58 @@ >> +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License version 2 and >> + * only version 2 as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + */ >> + >> +#include <linux/static_key.h> >> + >> +extern int ___aeabi_idiv(int, int); >> +extern unsigned ___aeabi_uidiv(int, int); > Why are the input parameters signed? Copy pasta. Fixed thanks.
On 11/08/13 08:52, Russell King - ARM Linux wrote: > On Fri, Nov 08, 2013 at 10:58:42AM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote: >> On 11:20 Thu 07 Nov , Stephen Boyd wrote: >>> @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void) >>> elf_hwcap |= HWCAP_IDIVT; >>> } >>> >>> +#ifdef CONFIG_THUMB2_KERNEL >> if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT) >>> + if (elf_hwcap & HWCAP_IDIVT) >>> +#else >>> + if (elf_hwcap & HWCAP_IDIVA) >>> +#endif > Take another look, and you'll see the change that you're suggesting is > wrong. Instead, the following may be a more reasonable suggestion as > a suitable replacement: > > if (elf_hwcap & (IS_ENABLED(CONFIG_THUMB2_KERNEL) ? > HWCAP_IDIVT : HWCAP_IDIVA)) I can use IS_ENABLED() but I'd prefer a local variable to make it simpler in the conditional.
On 11/08/13 09:02, Måns Rullgård wrote: > Stephen Boyd <sboyd@codeaurora.org> writes: > >> +int __aeabi_idiv(int numerator, int denominator) >> +{ >> + if (static_key_false(&cpu_has_idiv)) { >> + int ret; >> + >> + asm volatile ( >> + ".arch_extension idiv\n" >> + "sdiv %0, %1, %2" >> + : "=&r" (ret) > There is no need for the & in the output constraint. Dropping it allows > using one of the source registers as destination which may sometimes be > beneficial. Ok. Thanks. That does seem to improve things. before: 00000000 <__aeabi_idiv>: 0: e320f000 nop {0} 4: eafffffe b 0 <___aeabi_idiv> 8: e713f110 sdiv r3, r0, r1 c: e1a00003 mov r0, r3 10: e12fff1e bx lr 00000014 <__aeabi_uidiv>: 14: e320f000 nop {0} 18: eafffffe b 0 <___aeabi_uidiv> 1c: e733f110 udiv r3, r0, r1 20: e1a00003 mov r0, r3 24: e12fff1e bx lr after: 00000000 <__aeabi_idiv>: 0: e320f000 nop {0} 4: eafffffe b 0 <___aeabi_idiv> 8: e710f110 sdiv r0, r0, r1 c: e12fff1e bx lr 00000010 <__aeabi_uidiv>: 10: e320f000 nop {0} 14: eafffffe b 0 <___aeabi_uidiv> 18: e730f110 udiv r0, r0, r1 1c: e12fff1e bx lr
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 0e1e2b3..7d519f4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -30,6 +30,7 @@ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/sort.h> +#include <linux/static_key.h> #include <asm/unified.h> #include <asm/cp15.h> @@ -365,6 +366,8 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE; + static void __init cpuid_init_hwcaps(void) { unsigned int divide_instrs, vmsa; @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void) elf_hwcap |= HWCAP_IDIVT; } +#ifdef CONFIG_THUMB2_KERNEL + if (elf_hwcap & HWCAP_IDIVT) +#else + if (elf_hwcap & HWCAP_IDIVA) +#endif + static_key_slow_inc(&cpu_has_idiv); + /* LPAE implies atomic ldrd/strd instructions */ vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; if (vmsa >= 5) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index bd454b0..6ed6496 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -15,6 +15,9 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ call_with_stack.o +lib-$(CONFIG_CPU_V7) += div-v7.o +CFLAGS_div-v7.o := -march=armv7-a + mmu-y := clear_user.o copy_page.o getuser.o putuser.o # the code in uaccess.S is not preemption safe and diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c new file mode 100644 index 0000000..96ceb92 --- /dev/null +++ b/arch/arm/lib/div-v7.c @@ -0,0 +1,58 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/static_key.h> + +extern int ___aeabi_idiv(int, int); +extern unsigned ___aeabi_uidiv(int, int); + +extern struct static_key cpu_has_idiv; + +int __aeabi_idiv(int numerator, int denominator) +{ + if (static_key_false(&cpu_has_idiv)) { + int ret; + + asm volatile ( + ".arch_extension idiv\n" + "sdiv %0, %1, %2" + : "=&r" (ret) + : "r" (numerator), "r" (denominator)); + + return ret; + } + + return ___aeabi_idiv(numerator, denominator); +} + +int __divsi3(int numerator, int denominator) + __attribute__((alias("__aeabi_idiv"))); + +unsigned __aeabi_uidiv(int numerator, int denominator) +{ + if (static_key_false(&cpu_has_idiv)) { + int ret; + + asm volatile ( + ".arch_extension idiv\n" + "udiv %0, %1, %2" + : "=&r" (ret) + : "r" (numerator), "r" (denominator)); + + return ret; + } + + return ___aeabi_uidiv(numerator, denominator); +} + +unsigned __udivsi3(int numerator, int denominator) + __attribute__((alias("__aeabi_uidiv"))); diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index c562f64..adea088 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -205,8 +205,12 @@ Boston, MA 02111-1307, USA. */ .endm +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) +#else +ENTRY(___aeabi_uidiv) +#endif UNWIND(.fnstart) subs r2, r1, #1 @@ -232,8 +236,12 @@ UNWIND(.fnstart) mov pc, lr UNWIND(.fnend) +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) +#else +ENDPROC(___aeabi_uidiv) +#endif ENTRY(__umodsi3) UNWIND(.fnstart) @@ -253,8 +261,12 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__umodsi3) +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) ENTRY(__divsi3) ENTRY(__aeabi_idiv) +#else +ENTRY(___aeabi_idiv) +#endif UNWIND(.fnstart) cmp r1, #0 @@ -293,8 +305,12 @@ UNWIND(.fnstart) mov pc, lr UNWIND(.fnend) +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) +#else +ENDPROC(___aeabi_idiv) +#endif ENTRY(__modsi3) UNWIND(.fnstart)
If we're running on a v7 ARM CPU, detect if the CPU supports the sdiv/udiv instructions and replace the signed and unsigned division library functions with an sdiv/udiv instruction. Running the perf messaging benchmark in pipe mode $ perf bench sched messaging -p shows a modest improvement on my v7 CPU. before: (5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805 after: (4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538 (5.805 - 5.538) / 5.805 = 4.6% Signed-off-by: Stephen Boyd <sboyd@codeaurora.org> --- Should we add in the __div0() call if the denominator is 0? arch/arm/kernel/setup.c | 10 +++++++++ arch/arm/lib/Makefile | 3 +++ arch/arm/lib/div-v7.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/arm/lib/lib1funcs.S | 16 +++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 arch/arm/lib/div-v7.c