diff mbox

[46/74] x86, lto: Disable fancy hweight optimizations for LTO

Message ID 1345345030-22211-47-git-send-email-andi@firstfloor.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andi Kleen Aug. 19, 2012, 2:56 a.m. UTC
From: Andi Kleen <ak@linux.intel.com>

The fancy x86 hweight uses different compiler options for the
hweight file. This does not work with LTO. Just disable the optimization
with LTO

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/Kconfig                    |    5 +++--
 arch/x86/include/asm/arch_hweight.h |    9 +++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

Comments

Jan Beulich Aug. 19, 2012, 8:28 a.m. UTC | #1
>>> Andi Kleen <andi@firstfloor.org> 08/19/12 4:58 AM >>>
>--- a/arch/x86/Kconfig
>+++ b/arch/x86/Kconfig
>@@ -224,8 +224,9 @@ config X86_32_LAZY_GS
 >
>config ARCH_HWEIGHT_CFLAGS
>    string
>-    default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
>-    default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
>+    default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 && !LTO
>+    default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 && !LTO
>+    default "" if LTO
 
By moving this last line first you can avoid modifying the other two lines.

>--- a/arch/x86/include/asm/arch_hweight.h
>+++ b/arch/x86/include/asm/arch_hweight.h
>@@ -25,9 +25,14 @@ static inline unsigned int __arch_hweight32(unsigned int w)
>{
>    unsigned int res = 0;
 >
>+#ifdef CONFIG_LTO
>+    res  = __sw_hweight32(w);
>+#else
>+
>    asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
>             : "="REG_OUT (res)
>             : REG_IN (w));
>+#endif
 
Isn't this a little to harsh? Rather than not using popcnt at all, why don't you just add
the necessary clobbers to the asm() in the LTO case?

Jan

--
To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andi Kleen Aug. 19, 2012, 3:15 p.m. UTC | #2
> By moving this last line first you can avoid modifying the other two lines.

Ok.

> 
> >--- a/arch/x86/include/asm/arch_hweight.h
> >+++ b/arch/x86/include/asm/arch_hweight.h
> >@@ -25,9 +25,14 @@ static inline unsigned int __arch_hweight32(unsigned int w)
> >{
> >    unsigned int res = 0;
>  >
> >+#ifdef CONFIG_LTO
> >+    res  = __sw_hweight32(w);
> >+#else
> >+
> >    asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
> >             : "="REG_OUT (res)
> >             : REG_IN (w));
> >+#endif
>  
> Isn't this a little to harsh? Rather than not using popcnt at all, why don't you just add
> the necessary clobbers to the asm() in the LTO case?

gcc lacks the means to declare that a asm uses an external symbol
currently. Ok we could make it visible. But there's no way to make the
special calling convention work anyways, at least not without someone 
changing gcc to allow to declare this per function.

I'm not sure the optimization is really worth it anyways, hweight should
be uncommon.

-Andi
Avi Kivity Aug. 20, 2012, 9:15 a.m. UTC | #3
On 08/19/2012 05:56 AM, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> The fancy x86 hweight uses different compiler options for the
> hweight file. This does not work with LTO. Just disable the optimization
> with LTO
> 
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/Kconfig                    |    5 +++--
>  arch/x86/include/asm/arch_hweight.h |    9 +++++++++
>  2 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 8ec3a1a..9382b09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -224,8 +224,9 @@ config X86_32_LAZY_GS
>  
>  config ARCH_HWEIGHT_CFLAGS
>  	string
> -	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
> -	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
> +	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 && !LTO
> +	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 && !LTO
> +	default "" if LTO
>  

Seems heavy handed.  How about using __attribute__((optimize(...))) instead?
Andi Kleen Aug. 20, 2012, 9:42 a.m. UTC | #4
> >  config ARCH_HWEIGHT_CFLAGS
> >  	string
> > -	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
> > -	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
> > +	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 && !LTO
> > +	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 && !LTO
> > +	default "" if LTO
> >  
> 
> Seems heavy handed.  How about using __attribute__((optimize(...))) instead?

Doesn't work for this. In fact according to the gcc developers that
attribute is mostly broken.

-Andi
Jan Beulich Aug. 20, 2012, 10:57 a.m. UTC | #5
>>> On 19.08.12 at 17:15, Andi Kleen <andi@firstfloor.org> wrote:
>> >--- a/arch/x86/include/asm/arch_hweight.h
>> >+++ b/arch/x86/include/asm/arch_hweight.h
>> >@@ -25,9 +25,14 @@ static inline unsigned int __arch_hweight32(unsigned int w)
>> >{
>> >    unsigned int res = 0;
>>  >
>> >+#ifdef CONFIG_LTO
>> >+    res  = __sw_hweight32(w);
>> >+#else
>> >+
>> >    asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
>> >             : "="REG_OUT (res)
>> >             : REG_IN (w));
>> >+#endif
>>  
>> Isn't this a little to harsh? Rather than not using popcnt at all, why don't 
>> you just add the necessary clobbers to the asm() in the LTO case?
> 
> gcc lacks the means to declare that a asm uses an external symbol
> currently. Ok we could make it visible. But there's no way to make the
> special calling convention work anyways, at least not without someone 
> changing gcc to allow to declare this per function.

That's not the point: The point really is that you could allow the
alternative regardless of LTO, and just penalize the LTO case
by having even the asm clobber the registers that a function call
would not preserve.

> I'm not sure the optimization is really worth it anyways, hweight should
> be uncommon.

That's a separate question (but I sort of agree - not sure whether
CPU mask weights ever get calculated on hot paths).

Jan

--
To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andi Kleen Aug. 20, 2012, 11:18 a.m. UTC | #6
> That's not the point: The point really is that you could allow the
> alternative regardless of LTO, and just penalize the LTO case
> by having even the asm clobber the registers that a function call
> would not preserve.

That's just what a normal call does, right?

-Andi
Jan Beulich Aug. 20, 2012, 12:38 p.m. UTC | #7
>>> On 20.08.12 at 13:18, Andi Kleen <ak@linux.intel.com> wrote:
>>  That's not the point: The point really is that you could allow the
>> alternative regardless of LTO, and just penalize the LTO case
>> by having even the asm clobber the registers that a function call
>> would not preserve.
> 
> That's just what a normal call does, right?

Exactly.

Jan

--
To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1a..9382b09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -224,8 +224,9 @@  config X86_32_LAZY_GS
 
 config ARCH_HWEIGHT_CFLAGS
 	string
-	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
-	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
+	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 && !LTO
+	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 && !LTO
+	default "" if LTO
 
 config ARCH_CPU_PROBE_RELEASE
 	def_bool y
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 9686c3d..ca80549 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -25,9 +25,14 @@  static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res = 0;
 
+#ifdef CONFIG_LTO
+	res  = __sw_hweight32(w);
+#else
+
 	asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
 		     : "="REG_OUT (res)
 		     : REG_IN (w));
+#endif
 
 	return res;
 }
@@ -46,6 +51,9 @@  static inline unsigned long __arch_hweight64(__u64 w)
 {
 	unsigned long res = 0;
 
+#ifdef CONFIG_LTO
+	res = __sw_hweight64(w);
+#else
 #ifdef CONFIG_X86_32
 	return  __arch_hweight32((u32)w) +
 		__arch_hweight32((u32)(w >> 32));
@@ -54,6 +62,7 @@  static inline unsigned long __arch_hweight64(__u64 w)
 		     : "="REG_OUT (res)
 		     : REG_IN (w));
 #endif /* CONFIG_X86_32 */
+#endif
 
 	return res;
 }