diff mbox series

[v2,2/5] x86/idle: Disable IBRS when cpu is offline

Message ID 20230620140625.1001886-3-longman@redhat.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series x86/speculation: Disable IBRS when idle | expand

Commit Message

Waiman Long June 20, 2023, 2:06 p.m. UTC
Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle")
disables IBRS when the CPU enters long idle. However, when a CPU becomes
offline, the IBRS bit is still set when X86_FEATURE_KERNEL_IBRS is
enabled. That will impact the performance of a sibling CPU. Mitigate
this performance impact by clearing all the mitigation bits in SPEC_CTRL
MSR when offline and restoring the value of the MSR when it becomes
online again.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 arch/x86/kernel/smpboot.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Peter Zijlstra June 21, 2023, 7:23 a.m. UTC | #1
On Tue, Jun 20, 2023 at 10:06:22AM -0400, Waiman Long wrote:
> Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle")
> disables IBRS when the CPU enters long idle. However, when a CPU becomes
> offline, the IBRS bit is still set when X86_FEATURE_KERNEL_IBRS is
> enabled. That will impact the performance of a sibling CPU. Mitigate
> this performance impact by clearing all the mitigation bits in SPEC_CTRL
> MSR when offline and restoring the value of the MSR when it becomes
> online again.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> ---
>  arch/x86/kernel/smpboot.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 352f0ce1ece4..5ff82fef413c 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -84,6 +84,7 @@
>  #include <asm/hw_irq.h>
>  #include <asm/stackprotector.h>
>  #include <asm/sev.h>
> +#include <asm/nospec-branch.h>
>  
>  /* representing HT siblings of each logical CPU */
>  DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
> @@ -1838,12 +1839,24 @@ void __noreturn hlt_play_dead(void)
>  
>  void native_play_dead(void)
>  {
> +	u64 spec_ctrl = spec_ctrl_current();
> +
> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
> +		this_cpu_write(x86_spec_ctrl_current, 0);
> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> +	}
> +
>  	play_dead_common();
>  	tboot_shutdown(TB_SHUTDOWN_WFS);
>  
>  	mwait_play_dead();
>  	if (cpuidle_play_dead())
>  		hlt_play_dead();
> +
> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
> +		this_cpu_write(x86_spec_ctrl_current, spec_ctrl);
> +	}
>  }

play_dead() is marked __noreturn
Waiman Long June 21, 2023, 1:59 p.m. UTC | #2
On 6/21/23 03:23, Peter Zijlstra wrote:
> On Tue, Jun 20, 2023 at 10:06:22AM -0400, Waiman Long wrote:
>> Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle")
>> disables IBRS when the CPU enters long idle. However, when a CPU becomes
>> offline, the IBRS bit is still set when X86_FEATURE_KERNEL_IBRS is
>> enabled. That will impact the performance of a sibling CPU. Mitigate
>> this performance impact by clearing all the mitigation bits in SPEC_CTRL
>> MSR when offline and restoring the value of the MSR when it becomes
>> online again.
>>
>> Signed-off-by: Waiman Long <longman@redhat.com>
>> ---
>>   arch/x86/kernel/smpboot.c | 13 +++++++++++++
>>   1 file changed, 13 insertions(+)
>>
>> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
>> index 352f0ce1ece4..5ff82fef413c 100644
>> --- a/arch/x86/kernel/smpboot.c
>> +++ b/arch/x86/kernel/smpboot.c
>> @@ -84,6 +84,7 @@
>>   #include <asm/hw_irq.h>
>>   #include <asm/stackprotector.h>
>>   #include <asm/sev.h>
>> +#include <asm/nospec-branch.h>
>>   
>>   /* representing HT siblings of each logical CPU */
>>   DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
>> @@ -1838,12 +1839,24 @@ void __noreturn hlt_play_dead(void)
>>   
>>   void native_play_dead(void)
>>   {
>> +	u64 spec_ctrl = spec_ctrl_current();
>> +
>> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
>> +		this_cpu_write(x86_spec_ctrl_current, 0);
>> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>> +	}
>> +
>>   	play_dead_common();
>>   	tboot_shutdown(TB_SHUTDOWN_WFS);
>>   
>>   	mwait_play_dead();
>>   	if (cpuidle_play_dead())
>>   		hlt_play_dead();
>> +
>> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
>> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
>> +		this_cpu_write(x86_spec_ctrl_current, spec_ctrl);
>> +	}
>>   }
> play_dead() is marked __noreturn

There are different versions of play_dead() in the kernel. Some of them 
are indeed marked __noreturn like the non-SMP one in 
arch/x86/kernel/process.c. The native_play_dead() that I am patching 
isn't one of those.

Cheers,
Longman
Peter Zijlstra June 21, 2023, 2:36 p.m. UTC | #3
On Wed, Jun 21, 2023 at 09:59:52AM -0400, Waiman Long wrote:
> 
> On 6/21/23 03:23, Peter Zijlstra wrote:
> > On Tue, Jun 20, 2023 at 10:06:22AM -0400, Waiman Long wrote:
> > > Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle")
> > > disables IBRS when the CPU enters long idle. However, when a CPU becomes
> > > offline, the IBRS bit is still set when X86_FEATURE_KERNEL_IBRS is
> > > enabled. That will impact the performance of a sibling CPU. Mitigate
> > > this performance impact by clearing all the mitigation bits in SPEC_CTRL
> > > MSR when offline and restoring the value of the MSR when it becomes
> > > online again.
> > > 
> > > Signed-off-by: Waiman Long <longman@redhat.com>
> > > ---
> > >   arch/x86/kernel/smpboot.c | 13 +++++++++++++
> > >   1 file changed, 13 insertions(+)
> > > 
> > > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> > > index 352f0ce1ece4..5ff82fef413c 100644
> > > --- a/arch/x86/kernel/smpboot.c
> > > +++ b/arch/x86/kernel/smpboot.c
> > > @@ -84,6 +84,7 @@
> > >   #include <asm/hw_irq.h>
> > >   #include <asm/stackprotector.h>
> > >   #include <asm/sev.h>
> > > +#include <asm/nospec-branch.h>
> > >   /* representing HT siblings of each logical CPU */
> > >   DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
> > > @@ -1838,12 +1839,24 @@ void __noreturn hlt_play_dead(void)
> > >   void native_play_dead(void)
> > >   {
> > > +	u64 spec_ctrl = spec_ctrl_current();
> > > +
> > > +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
> > > +		this_cpu_write(x86_spec_ctrl_current, 0);
> > > +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> > > +	}
> > > +
> > >   	play_dead_common();
> > >   	tboot_shutdown(TB_SHUTDOWN_WFS);
> > >   	mwait_play_dead();
> > >   	if (cpuidle_play_dead())
> > >   		hlt_play_dead();
> > > +
> > > +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
> > > +		native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
> > > +		this_cpu_write(x86_spec_ctrl_current, spec_ctrl);
> > > +	}
> > >   }
> > play_dead() is marked __noreturn
> 
> There are different versions of play_dead() in the kernel. Some of them are
> indeed marked __noreturn like the non-SMP one in arch/x86/kernel/process.c.
> The native_play_dead() that I am patching isn't one of those.

mostly by accident I think, hlt_play_dead() is, so I'm thinking
everybody (all compiler and objtool) managed to figure out
native_play_dead() is __noreturn too.
Waiman Long June 21, 2023, 2:44 p.m. UTC | #4
On 6/21/23 10:36, Peter Zijlstra wrote:
> On Wed, Jun 21, 2023 at 09:59:52AM -0400, Waiman Long wrote:
>> On 6/21/23 03:23, Peter Zijlstra wrote:
>>> On Tue, Jun 20, 2023 at 10:06:22AM -0400, Waiman Long wrote:
>>>> Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle")
>>>> disables IBRS when the CPU enters long idle. However, when a CPU becomes
>>>> offline, the IBRS bit is still set when X86_FEATURE_KERNEL_IBRS is
>>>> enabled. That will impact the performance of a sibling CPU. Mitigate
>>>> this performance impact by clearing all the mitigation bits in SPEC_CTRL
>>>> MSR when offline and restoring the value of the MSR when it becomes
>>>> online again.
>>>>
>>>> Signed-off-by: Waiman Long <longman@redhat.com>
>>>> ---
>>>>    arch/x86/kernel/smpboot.c | 13 +++++++++++++
>>>>    1 file changed, 13 insertions(+)
>>>>
>>>> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
>>>> index 352f0ce1ece4..5ff82fef413c 100644
>>>> --- a/arch/x86/kernel/smpboot.c
>>>> +++ b/arch/x86/kernel/smpboot.c
>>>> @@ -84,6 +84,7 @@
>>>>    #include <asm/hw_irq.h>
>>>>    #include <asm/stackprotector.h>
>>>>    #include <asm/sev.h>
>>>> +#include <asm/nospec-branch.h>
>>>>    /* representing HT siblings of each logical CPU */
>>>>    DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
>>>> @@ -1838,12 +1839,24 @@ void __noreturn hlt_play_dead(void)
>>>>    void native_play_dead(void)
>>>>    {
>>>> +	u64 spec_ctrl = spec_ctrl_current();
>>>> +
>>>> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
>>>> +		this_cpu_write(x86_spec_ctrl_current, 0);
>>>> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>>> +	}
>>>> +
>>>>    	play_dead_common();
>>>>    	tboot_shutdown(TB_SHUTDOWN_WFS);
>>>>    	mwait_play_dead();
>>>>    	if (cpuidle_play_dead())
>>>>    		hlt_play_dead();
>>>> +
>>>> +	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
>>>> +		native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
>>>> +		this_cpu_write(x86_spec_ctrl_current, spec_ctrl);
>>>> +	}
>>>>    }
>>> play_dead() is marked __noreturn
>> There are different versions of play_dead() in the kernel. Some of them are
>> indeed marked __noreturn like the non-SMP one in arch/x86/kernel/process.c.
>> The native_play_dead() that I am patching isn't one of those.
> mostly by accident I think, hlt_play_dead() is, so I'm thinking
> everybody (all compiler and objtool) managed to figure out
> native_play_dead() is __noreturn too.

Well, hlt_play_dead() is only called if cpuidle_play_dead() returns an 
error which is not the typical case. My testing does confirm that this 
patch is able to keep the IBRS bit off when a CPU is offline via its 
online sysfs file.

Cheers,
Longman
Peter Zijlstra June 21, 2023, 2:48 p.m. UTC | #5
On Wed, Jun 21, 2023 at 10:44:23AM -0400, Waiman Long wrote:

> Well, hlt_play_dead() is only called if cpuidle_play_dead() returns an error
> which is not the typical case. My testing does confirm that this patch is
> able to keep the IBRS bit off when a CPU is offline via its online sysfs
> file.

The point is; your re-enable IBRS hunk at the end is dead-code. It
should never ever run and having it is confusing.
Waiman Long June 21, 2023, 2:51 p.m. UTC | #6
On 6/21/23 10:48, Peter Zijlstra wrote:
> On Wed, Jun 21, 2023 at 10:44:23AM -0400, Waiman Long wrote:
>
>> Well, hlt_play_dead() is only called if cpuidle_play_dead() returns an error
>> which is not the typical case. My testing does confirm that this patch is
>> able to keep the IBRS bit off when a CPU is offline via its online sysfs
>> file.
> The point is; your re-enable IBRS hunk at the end is dead-code. It
> should never ever run and having it is confusing.

What I meant is that hlt_play_dead() should never be called unless there 
is some serious problem with the system and native_play_dead() does 
return in normal usage.

Cheers,
Longman
Peter Zijlstra June 21, 2023, 2:54 p.m. UTC | #7
On Wed, Jun 21, 2023 at 10:51:33AM -0400, Waiman Long wrote:
> 
> On 6/21/23 10:48, Peter Zijlstra wrote:
> > On Wed, Jun 21, 2023 at 10:44:23AM -0400, Waiman Long wrote:
> > 
> > > Well, hlt_play_dead() is only called if cpuidle_play_dead() returns an error
> > > which is not the typical case. My testing does confirm that this patch is
> > > able to keep the IBRS bit off when a CPU is offline via its online sysfs
> > > file.
> > The point is; your re-enable IBRS hunk at the end is dead-code. It
> > should never ever run and having it is confusing.
> 
> What I meant is that hlt_play_dead() should never be called unless there is
> some serious problem with the system and native_play_dead() does return in
> normal usage.

This is all through arch_cpu_idle_dead() which is __noreturn. And no,
none of this must ever return.

If you want an offline CPU to come back, you re-init.
Waiman Long June 21, 2023, 3:42 p.m. UTC | #8
On 6/21/23 10:54, Peter Zijlstra wrote:
> On Wed, Jun 21, 2023 at 10:51:33AM -0400, Waiman Long wrote:
>> On 6/21/23 10:48, Peter Zijlstra wrote:
>>> On Wed, Jun 21, 2023 at 10:44:23AM -0400, Waiman Long wrote:
>>>
>>>> Well, hlt_play_dead() is only called if cpuidle_play_dead() returns an error
>>>> which is not the typical case. My testing does confirm that this patch is
>>>> able to keep the IBRS bit off when a CPU is offline via its online sysfs
>>>> file.
>>> The point is; your re-enable IBRS hunk at the end is dead-code. It
>>> should never ever run and having it is confusing.
>> What I meant is that hlt_play_dead() should never be called unless there is
>> some serious problem with the system and native_play_dead() does return in
>> normal usage.
> This is all through arch_cpu_idle_dead() which is __noreturn. And no,
> none of this must ever return.
>
> If you want an offline CPU to come back, you re-init.

Yes, you are right. I thought it will return. I will update the patch 
accordingly.

Thanks,
Longman
diff mbox series

Patch

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 352f0ce1ece4..5ff82fef413c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -84,6 +84,7 @@ 
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 #include <asm/sev.h>
+#include <asm/nospec-branch.h>
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -1838,12 +1839,24 @@  void __noreturn hlt_play_dead(void)
 
 void native_play_dead(void)
 {
+	u64 spec_ctrl = spec_ctrl_current();
+
+	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
+		this_cpu_write(x86_spec_ctrl_current, 0);
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+	}
+
 	play_dead_common();
 	tboot_shutdown(TB_SHUTDOWN_WFS);
 
 	mwait_play_dead();
 	if (cpuidle_play_dead())
 		hlt_play_dead();
+
+	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) {
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+		this_cpu_write(x86_spec_ctrl_current, spec_ctrl);
+	}
 }
 
 #else /* ... !CONFIG_HOTPLUG_CPU */