diff mbox

ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions

Message ID 1383852042-10780-1-git-send-email-sboyd@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Boyd Nov. 7, 2013, 7:20 p.m. UTC
If we're running on a v7 ARM CPU, detect if the CPU supports the
sdiv/udiv instructions and replace the signed and unsigned
division library functions with an sdiv/udiv instruction.

Running the perf messaging benchmark in pipe mode

 $ perf bench sched messaging -p

shows a modest improvement on my v7 CPU.

before:
(5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805

after:
(4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538

(5.805 - 5.538) / 5.805 = 4.6%

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---

Should we add in the __div0() call if the denominator is 0?

 arch/arm/kernel/setup.c  | 10 +++++++++
 arch/arm/lib/Makefile    |  3 +++
 arch/arm/lib/div-v7.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/lib/lib1funcs.S | 16 +++++++++++++
 4 files changed, 87 insertions(+)
 create mode 100644 arch/arm/lib/div-v7.c

Comments

Rob Herring Nov. 8, 2013, 1:34 a.m. UTC | #1
On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote:
> If we're running on a v7 ARM CPU, detect if the CPU supports the
> sdiv/udiv instructions and replace the signed and unsigned
> division library functions with an sdiv/udiv instruction.

[snip]

> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index bd454b0..6ed6496 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -15,6 +15,9 @@ lib-y         := backtrace.o changebit.o csumipv6.o csumpartial.o   \
>                    io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
>                    call_with_stack.o
>
> +lib-$(CONFIG_CPU_V7) += div-v7.o
> +CFLAGS_div-v7.o := -march=armv7-a

Won't this fail to build if the compiler doesn't have armv7-a support.
Perhaps we don't care about compilers that old.

Rob
Jean-Christophe PLAGNIOL-VILLARD Nov. 8, 2013, 9:58 a.m. UTC | #2
On 11:20 Thu 07 Nov     , Stephen Boyd wrote:
> If we're running on a v7 ARM CPU, detect if the CPU supports the
> sdiv/udiv instructions and replace the signed and unsigned
> division library functions with an sdiv/udiv instruction.
> 
> Running the perf messaging benchmark in pipe mode
> 
>  $ perf bench sched messaging -p
> 
> shows a modest improvement on my v7 CPU.
> 
> before:
> (5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805
> 
> after:
> (4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538
> 
> (5.805 - 5.538) / 5.805 = 4.6%
> 
> Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
> ---
> 
> Should we add in the __div0() call if the denominator is 0?
> 
>  arch/arm/kernel/setup.c  | 10 +++++++++
>  arch/arm/lib/Makefile    |  3 +++
>  arch/arm/lib/div-v7.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
>  arch/arm/lib/lib1funcs.S | 16 +++++++++++++
>  4 files changed, 87 insertions(+)
>  create mode 100644 arch/arm/lib/div-v7.c
> 
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index 0e1e2b3..7d519f4 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -30,6 +30,7 @@
>  #include <linux/bug.h>
>  #include <linux/compiler.h>
>  #include <linux/sort.h>
> +#include <linux/static_key.h>
>  
>  #include <asm/unified.h>
>  #include <asm/cp15.h>
> @@ -365,6 +366,8 @@ void __init early_print(const char *str, ...)
>  	printk("%s", buf);
>  }
>  
> +struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE;
> +
>  static void __init cpuid_init_hwcaps(void)
>  {
>  	unsigned int divide_instrs, vmsa;
> @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void)
>  		elf_hwcap |= HWCAP_IDIVT;
>  	}
>  
> +#ifdef CONFIG_THUMB2_KERNEL
if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT)
> +	if (elf_hwcap & HWCAP_IDIVT)
> +#else
> +	if (elf_hwcap & HWCAP_IDIVA)
> +#endif
> +		static_key_slow_inc(&cpu_has_idiv);
> +
>  	/* LPAE implies atomic ldrd/strd instructions */
>  	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
>  	if (vmsa >= 5)
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index bd454b0..6ed6496 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -15,6 +15,9 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
>  		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
>  		   call_with_stack.o
>  
> +lib-$(CONFIG_CPU_V7) += div-v7.o
> +CFLAGS_div-v7.o := -march=armv7-a
> +
>  mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
>  
>  # the code in uaccess.S is not preemption safe and
> diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c
> new file mode 100644
> index 0000000..96ceb92
> --- /dev/null
> +++ b/arch/arm/lib/div-v7.c
> @@ -0,0 +1,58 @@
> +/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/static_key.h>
> +
> +extern int ___aeabi_idiv(int, int);
> +extern unsigned ___aeabi_uidiv(int, int);
> +
> +extern struct static_key cpu_has_idiv;
> +
> +int __aeabi_idiv(int numerator, int denominator)
> +{
> +	if (static_key_false(&cpu_has_idiv)) {
> +		int ret;
> +
> +		asm volatile (
> +		".arch_extension idiv\n"
> +		"sdiv %0, %1, %2"
> +		: "=&r" (ret)
> +		: "r" (numerator), "r" (denominator));
> +
> +		return ret;
> +	}
> +
> +	return ___aeabi_idiv(numerator, denominator);
> +}
> +
> +int __divsi3(int numerator, int denominator)
> +	__attribute__((alias("__aeabi_idiv")));
> +
> +unsigned __aeabi_uidiv(int numerator, int denominator)
> +{
> +	if (static_key_false(&cpu_has_idiv)) {
> +		int ret;
> +
> +		asm volatile (
> +		".arch_extension idiv\n"
> +		"udiv %0, %1, %2"
> +		: "=&r" (ret)
> +		: "r" (numerator), "r" (denominator));
> +
> +		return ret;
> +	}
> +
> +	return ___aeabi_uidiv(numerator, denominator);
> +}
> +
> +unsigned __udivsi3(int numerator, int denominator)
> +	__attribute__((alias("__aeabi_uidiv")));
> diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
> index c562f64..adea088 100644
> --- a/arch/arm/lib/lib1funcs.S
> +++ b/arch/arm/lib/lib1funcs.S
> @@ -205,8 +205,12 @@ Boston, MA 02111-1307, USA.  */
>  .endm
>  
>  
> +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
>  ENTRY(__udivsi3)
>  ENTRY(__aeabi_uidiv)
> +#else
> +ENTRY(___aeabi_uidiv)
> +#endif
>  UNWIND(.fnstart)
>  
>  	subs	r2, r1, #1
> @@ -232,8 +236,12 @@ UNWIND(.fnstart)
>  	mov	pc, lr
>  
>  UNWIND(.fnend)
> +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
>  ENDPROC(__udivsi3)
>  ENDPROC(__aeabi_uidiv)
> +#else
> +ENDPROC(___aeabi_uidiv)
> +#endif
>  
>  ENTRY(__umodsi3)
>  UNWIND(.fnstart)
> @@ -253,8 +261,12 @@ UNWIND(.fnstart)
>  UNWIND(.fnend)
>  ENDPROC(__umodsi3)
>  
> +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
>  ENTRY(__divsi3)
>  ENTRY(__aeabi_idiv)
> +#else
> +ENTRY(___aeabi_idiv)
> +#endif
>  UNWIND(.fnstart)
>  
>  	cmp	r1, #0
> @@ -293,8 +305,12 @@ UNWIND(.fnstart)
>  	mov	pc, lr
>  
>  UNWIND(.fnend)
> +#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
>  ENDPROC(__divsi3)
>  ENDPROC(__aeabi_idiv)
> +#else
> +ENDPROC(___aeabi_idiv)
> +#endif
>  
>  ENTRY(__modsi3)
>  UNWIND(.fnstart)
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Jean-Christophe PLAGNIOL-VILLARD Nov. 8, 2013, 11:50 a.m. UTC | #3
On 19:34 Thu 07 Nov     , Rob Herring wrote:
> On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote:
> > If we're running on a v7 ARM CPU, detect if the CPU supports the
> > sdiv/udiv instructions and replace the signed and unsigned
> > division library functions with an sdiv/udiv instruction.
> 
> [snip]
> 
> > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> > index bd454b0..6ed6496 100644
> > --- a/arch/arm/lib/Makefile
> > +++ b/arch/arm/lib/Makefile
> > @@ -15,6 +15,9 @@ lib-y         := backtrace.o changebit.o csumipv6.o csumpartial.o   \
> >                    io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
> >                    call_with_stack.o
> >
> > +lib-$(CONFIG_CPU_V7) += div-v7.o
> > +CFLAGS_div-v7.o := -march=armv7-a
> 
> Won't this fail to build if the compiler doesn't have armv7-a support.
> Perhaps we don't care about compilers that old.

use the propoer compiler to compile a armv7 kernel

Best Regards,
J.
> 
> Rob
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Christopher Covington Nov. 8, 2013, 4:48 p.m. UTC | #4
Hi Stephen,

On 11/07/2013 02:20 PM, Stephen Boyd wrote:
> If we're running on a v7 ARM CPU, detect if the CPU supports the
> sdiv/udiv instructions and replace the signed and unsigned
> division library functions with an sdiv/udiv instruction.

[...]

> +++ b/arch/arm/lib/div-v7.c
> @@ -0,0 +1,58 @@
> +/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/static_key.h>
> +
> +extern int ___aeabi_idiv(int, int);
> +extern unsigned ___aeabi_uidiv(int, int);

Why are the input parameters signed?

> +extern struct static_key cpu_has_idiv;
> +
> +int __aeabi_idiv(int numerator, int denominator)
> +{
> +	if (static_key_false(&cpu_has_idiv)) {
> +		int ret;
> +
> +		asm volatile (
> +		".arch_extension idiv\n"
> +		"sdiv %0, %1, %2"
> +		: "=&r" (ret)
> +		: "r" (numerator), "r" (denominator));
> +
> +		return ret;
> +	}
> +
> +	return ___aeabi_idiv(numerator, denominator);
> +}
> +
> +int __divsi3(int numerator, int denominator)
> +	__attribute__((alias("__aeabi_idiv")));
> +
> +unsigned __aeabi_uidiv(int numerator, int denominator)

Unsigned inputs?

> +{
> +	if (static_key_false(&cpu_has_idiv)) {
> +		int ret;
> +
> +		asm volatile (
> +		".arch_extension idiv\n"
> +		"udiv %0, %1, %2"
> +		: "=&r" (ret)
> +		: "r" (numerator), "r" (denominator));
> +
> +		return ret;
> +	}
> +
> +	return ___aeabi_uidiv(numerator, denominator);
> +}
> +
> +unsigned __udivsi3(int numerator, int denominator)
> +	__attribute__((alias("__aeabi_uidiv")));

Unsigned inputs?

[...]

Thanks,
Christopher
Russell King - ARM Linux Nov. 8, 2013, 4:52 p.m. UTC | #5
On Fri, Nov 08, 2013 at 10:58:42AM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote:
> On 11:20 Thu 07 Nov     , Stephen Boyd wrote:
> > @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void)
> >  		elf_hwcap |= HWCAP_IDIVT;
> >  	}
> >  
> > +#ifdef CONFIG_THUMB2_KERNEL
> if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT)
> > +	if (elf_hwcap & HWCAP_IDIVT)
> > +#else
> > +	if (elf_hwcap & HWCAP_IDIVA)
> > +#endif

Take another look, and you'll see the change that you're suggesting is
wrong.  Instead, the following may be a more reasonable suggestion as
a suitable replacement:

	if (elf_hwcap & (IS_ENABLED(CONFIG_THUMB2_KERNEL) ?
				HWCAP_IDIVT : HWCAP_IDIVA))
Russell King - ARM Linux Nov. 8, 2013, 4:54 p.m. UTC | #6
On Fri, Nov 08, 2013 at 12:50:04PM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote:
> On 19:34 Thu 07 Nov     , Rob Herring wrote:
> > On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote:
> > > If we're running on a v7 ARM CPU, detect if the CPU supports the
> > > sdiv/udiv instructions and replace the signed and unsigned
> > > division library functions with an sdiv/udiv instruction.
> > 
> > [snip]
> > 
> > > diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> > > index bd454b0..6ed6496 100644
> > > --- a/arch/arm/lib/Makefile
> > > +++ b/arch/arm/lib/Makefile
> > > @@ -15,6 +15,9 @@ lib-y         := backtrace.o changebit.o csumipv6.o csumpartial.o   \
> > >                    io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
> > >                    call_with_stack.o
> > >
> > > +lib-$(CONFIG_CPU_V7) += div-v7.o
> > > +CFLAGS_div-v7.o := -march=armv7-a
> > 
> > Won't this fail to build if the compiler doesn't have armv7-a support.
> > Perhaps we don't care about compilers that old.
> 
> use the propoer compiler to compile a armv7 kernel

It's probably about time to get rid of the conditionals for this in
the main arch/arm/Makefile actually - some of those date back some
10 or so years.  That's something for the v3.14 merge window.
Måns Rullgård Nov. 8, 2013, 5:02 p.m. UTC | #7
Stephen Boyd <sboyd@codeaurora.org> writes:

> +int __aeabi_idiv(int numerator, int denominator)
> +{
> +	if (static_key_false(&cpu_has_idiv)) {
> +		int ret;
> +
> +		asm volatile (
> +		".arch_extension idiv\n"
> +		"sdiv %0, %1, %2"
> +		: "=&r" (ret)

There is no need for the & in the output constraint.  Dropping it allows
using one of the source registers as destination which may sometimes be
beneficial.

> +		: "r" (numerator), "r" (denominator));
> +
> +		return ret;
> +	}
> +
> +	return ___aeabi_idiv(numerator, denominator);
> +}
Stephen Boyd Nov. 8, 2013, 6:51 p.m. UTC | #8
On 11/08/13 08:54, Russell King - ARM Linux wrote:
> On Fri, Nov 08, 2013 at 12:50:04PM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote:
>> On 19:34 Thu 07 Nov     , Rob Herring wrote:
>>> On Thu, Nov 7, 2013 at 1:20 PM, Stephen Boyd <sboyd@codeaurora.org> wrote:
>>>> If we're running on a v7 ARM CPU, detect if the CPU supports the
>>>> sdiv/udiv instructions and replace the signed and unsigned
>>>> division library functions with an sdiv/udiv instruction.
>>> [snip]
>>>
>>>> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
>>>> index bd454b0..6ed6496 100644
>>>> --- a/arch/arm/lib/Makefile
>>>> +++ b/arch/arm/lib/Makefile
>>>> @@ -15,6 +15,9 @@ lib-y         := backtrace.o changebit.o csumipv6.o csumpartial.o   \
>>>>                    io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
>>>>                    call_with_stack.o
>>>>
>>>> +lib-$(CONFIG_CPU_V7) += div-v7.o
>>>> +CFLAGS_div-v7.o := -march=armv7-a
>>> Won't this fail to build if the compiler doesn't have armv7-a support.
>>> Perhaps we don't care about compilers that old.
>> use the propoer compiler to compile a armv7 kernel
> It's probably about time to get rid of the conditionals for this in
> the main arch/arm/Makefile actually - some of those date back some
> 10 or so years.  That's something for the v3.14 merge window.

I'll take that as an endorsement for not falling back to -march=armv5t
-Wa,-march=armv7-a like is done in arch/arm/Makefile.
Stephen Boyd Nov. 8, 2013, 6:51 p.m. UTC | #9
On 11/08/13 08:48, Christopher Covington wrote:
> Hi Stephen,
>
> On 11/07/2013 02:20 PM, Stephen Boyd wrote:
>> If we're running on a v7 ARM CPU, detect if the CPU supports the
>> sdiv/udiv instructions and replace the signed and unsigned
>> division library functions with an sdiv/udiv instruction.
> [...]
>
>> +++ b/arch/arm/lib/div-v7.c
>> @@ -0,0 +1,58 @@
>> +/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include <linux/static_key.h>
>> +
>> +extern int ___aeabi_idiv(int, int);
>> +extern unsigned ___aeabi_uidiv(int, int);
> Why are the input parameters signed?

Copy pasta. Fixed thanks.
Stephen Boyd Nov. 8, 2013, 6:53 p.m. UTC | #10
On 11/08/13 08:52, Russell King - ARM Linux wrote:
> On Fri, Nov 08, 2013 at 10:58:42AM +0100, Jean-Christophe PLAGNIOL-VILLARD wrote:
>> On 11:20 Thu 07 Nov     , Stephen Boyd wrote:
>>> @@ -381,6 +384,13 @@ static void __init cpuid_init_hwcaps(void)
>>>  		elf_hwcap |= HWCAP_IDIVT;
>>>  	}
>>>  
>>> +#ifdef CONFIG_THUMB2_KERNEL
>> if (IS_ENABLED(CONFIG_THUMB2_KERNEL) && elf_hwcap & HWCAP_IDIVT)
>>> +	if (elf_hwcap & HWCAP_IDIVT)
>>> +#else
>>> +	if (elf_hwcap & HWCAP_IDIVA)
>>> +#endif
> Take another look, and you'll see the change that you're suggesting is
> wrong.  Instead, the following may be a more reasonable suggestion as
> a suitable replacement:
>
> 	if (elf_hwcap & (IS_ENABLED(CONFIG_THUMB2_KERNEL) ?
> 				HWCAP_IDIVT : HWCAP_IDIVA))

I can use IS_ENABLED() but I'd prefer a local variable to make it
simpler in the conditional.
Stephen Boyd Nov. 8, 2013, 7:04 p.m. UTC | #11
On 11/08/13 09:02, Måns Rullgård wrote:
> Stephen Boyd <sboyd@codeaurora.org> writes:
>
>> +int __aeabi_idiv(int numerator, int denominator)
>> +{
>> +	if (static_key_false(&cpu_has_idiv)) {
>> +		int ret;
>> +
>> +		asm volatile (
>> +		".arch_extension idiv\n"
>> +		"sdiv %0, %1, %2"
>> +		: "=&r" (ret)
> There is no need for the & in the output constraint.  Dropping it allows
> using one of the source registers as destination which may sometimes be
> beneficial.

Ok. Thanks. That does seem to improve things.

before:

00000000 <__aeabi_idiv>:
   0:   e320f000        nop     {0}
   4:   eafffffe        b       0 <___aeabi_idiv>
   8:   e713f110        sdiv    r3, r0, r1
   c:   e1a00003        mov     r0, r3
  10:   e12fff1e        bx      lr

00000014 <__aeabi_uidiv>:
  14:   e320f000        nop     {0}
  18:   eafffffe        b       0 <___aeabi_uidiv>
  1c:   e733f110        udiv    r3, r0, r1
  20:   e1a00003        mov     r0, r3
  24:   e12fff1e        bx      lr

after:

00000000 <__aeabi_idiv>:
   0:   e320f000        nop     {0}
   4:   eafffffe        b       0 <___aeabi_idiv>
   8:   e710f110        sdiv    r0, r0, r1
   c:   e12fff1e        bx      lr

00000010 <__aeabi_uidiv>:
  10:   e320f000        nop     {0}
  14:   eafffffe        b       0 <___aeabi_uidiv>
  18:   e730f110        udiv    r0, r0, r1
  1c:   e12fff1e        bx      lr
diff mbox

Patch

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 0e1e2b3..7d519f4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -30,6 +30,7 @@ 
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/sort.h>
+#include <linux/static_key.h>
 
 #include <asm/unified.h>
 #include <asm/cp15.h>
@@ -365,6 +366,8 @@  void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE;
+
 static void __init cpuid_init_hwcaps(void)
 {
 	unsigned int divide_instrs, vmsa;
@@ -381,6 +384,13 @@  static void __init cpuid_init_hwcaps(void)
 		elf_hwcap |= HWCAP_IDIVT;
 	}
 
+#ifdef CONFIG_THUMB2_KERNEL
+	if (elf_hwcap & HWCAP_IDIVT)
+#else
+	if (elf_hwcap & HWCAP_IDIVA)
+#endif
+		static_key_slow_inc(&cpu_has_idiv);
+
 	/* LPAE implies atomic ldrd/strd instructions */
 	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
 	if (vmsa >= 5)
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index bd454b0..6ed6496 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -15,6 +15,9 @@  lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 		   call_with_stack.o
 
+lib-$(CONFIG_CPU_V7) += div-v7.o
+CFLAGS_div-v7.o := -march=armv7-a
+
 mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
 
 # the code in uaccess.S is not preemption safe and
diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c
new file mode 100644
index 0000000..96ceb92
--- /dev/null
+++ b/arch/arm/lib/div-v7.c
@@ -0,0 +1,58 @@ 
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/static_key.h>
+
+extern int ___aeabi_idiv(int, int);
+extern unsigned ___aeabi_uidiv(int, int);
+
+extern struct static_key cpu_has_idiv;
+
+int __aeabi_idiv(int numerator, int denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		int ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"sdiv %0, %1, %2"
+		: "=&r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_idiv(numerator, denominator);
+}
+
+int __divsi3(int numerator, int denominator)
+	__attribute__((alias("__aeabi_idiv")));
+
+unsigned __aeabi_uidiv(int numerator, int denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		int ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"udiv %0, %1, %2"
+		: "=&r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_uidiv(numerator, denominator);
+}
+
+unsigned __udivsi3(int numerator, int denominator)
+	__attribute__((alias("__aeabi_uidiv")));
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f64..adea088 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -205,8 +205,12 @@  Boston, MA 02111-1307, USA.  */
 .endm
 
 
+#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
 ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)
+#else
+ENTRY(___aeabi_uidiv)
+#endif
 UNWIND(.fnstart)
 
 	subs	r2, r1, #1
@@ -232,8 +236,12 @@  UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
 ENDPROC(__udivsi3)
 ENDPROC(__aeabi_uidiv)
+#else
+ENDPROC(___aeabi_uidiv)
+#endif
 
 ENTRY(__umodsi3)
 UNWIND(.fnstart)
@@ -253,8 +261,12 @@  UNWIND(.fnstart)
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
 
+#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
+#else
+ENTRY(___aeabi_idiv)
+#endif
 UNWIND(.fnstart)
 
 	cmp	r1, #0
@@ -293,8 +305,12 @@  UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#if defined(ZIMAGE) || !defined(CONFIG_CPU_V7)
 ENDPROC(__divsi3)
 ENDPROC(__aeabi_idiv)
+#else
+ENDPROC(___aeabi_idiv)
+#endif
 
 ENTRY(__modsi3)
 UNWIND(.fnstart)