Message ID | 20250331082251.3171276-11-xin@zytor.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | MSR refactor with new MSR instructions support | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Mon, Mar 31, 2025 at 01:22:46AM -0700, Xin Li (Intel) wrote: > Signed-off-by: Xin Li (Intel) <xin@zytor.com> > --- > arch/x86/include/asm/msr-index.h | 6 ++++++ > arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- > 2 files changed, 30 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index e6134ef2263d..04244c3ba374 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -1226,4 +1226,10 @@ > * a #GP > */ > > +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ > +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) > + > +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ > +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) > + > #endif /* _ASM_X86_MSR_INDEX_H */ > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S > index f6986dee6f8c..9fae43723c44 100644 > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -64,6 +64,29 @@ > RET > .endm > > +/* > + * Write EAX to MSR_IA32_SPEC_CTRL. > + * > + * Choose the best WRMSR instruction based on availability. > + * > + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. > + */ > +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > + xor %edx, %edx; \ > + mov %edi, %eax; \ > + ds wrmsr), \ > + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ > + xor %edx, %edx; \ > + mov %edi, %eax; \ > + ASM_WRMSRNS), \ > + X86_FEATURE_WRMSRNS, \ > + __stringify(xor %_ASM_AX, %_ASM_AX; \ > + mov %edi, %eax; \ > + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ > + X86_FEATURE_MSR_IMM > +.endm > + > .section .noinstr.text, "ax" > > /** > @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) > movl PER_CPU_VAR(x86_spec_ctrl_current), %esi > cmp %edi, %esi > je .Lspec_ctrl_done > - mov $MSR_IA32_SPEC_CTRL, %ecx > - xor %edx, %edx > - mov %edi, %eax > - wrmsr Is that the right path forward? That is replace the MSR write to disable speculative execution with a non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative? > + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL > > .Lspec_ctrl_done: > > -- > 2.49.0 > >
On Mon, Mar 31, 2025 at 04:27:23PM -0400, Konrad Rzeszutek Wilk wrote: > Is that the right path forward? > > That is replace the MSR write to disable speculative execution with a > non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative? Ha, interesting question. If the WRMSR is non-serializing, when do speculative things like indirect branches and the like get *actually* cleared and can such a speculation window be used to leak branch data even if IBRS is actually enabled for example... Fun. This change needs to be run by hw folks and I guess until then WRMSRNS should not get anywhere near mitigation MSRs like SPEC_CTRL or PRED_CMD... Thx.
On 31/03/2025 9:27 pm, Konrad Rzeszutek Wilk wrote: > On Mon, Mar 31, 2025 at 01:22:46AM -0700, Xin Li (Intel) wrote: >> Signed-off-by: Xin Li (Intel) <xin@zytor.com> >> --- >> arch/x86/include/asm/msr-index.h | 6 ++++++ >> arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- >> 2 files changed, 30 insertions(+), 4 deletions(-) >> >> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h >> index e6134ef2263d..04244c3ba374 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -1226,4 +1226,10 @@ >> * a #GP >> */ >> >> +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ >> +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) >> + >> +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ >> +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) >> + >> #endif /* _ASM_X86_MSR_INDEX_H */ >> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S >> index f6986dee6f8c..9fae43723c44 100644 >> --- a/arch/x86/kvm/vmx/vmenter.S >> +++ b/arch/x86/kvm/vmx/vmenter.S >> @@ -64,6 +64,29 @@ >> RET >> .endm >> >> +/* >> + * Write EAX to MSR_IA32_SPEC_CTRL. >> + * >> + * Choose the best WRMSR instruction based on availability. >> + * >> + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. >> + */ >> +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL >> + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ds wrmsr), \ >> + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS), \ >> + X86_FEATURE_WRMSRNS, \ >> + __stringify(xor %_ASM_AX, %_ASM_AX; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ >> + X86_FEATURE_MSR_IMM >> +.endm >> + >> .section .noinstr.text, "ax" >> >> /** >> @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) >> movl PER_CPU_VAR(x86_spec_ctrl_current), %esi >> cmp %edi, %esi >> je .Lspec_ctrl_done >> - mov $MSR_IA32_SPEC_CTRL, %ecx >> - xor %edx, %edx >> - mov %edi, %eax >> - wrmsr > Is that the right path forward? > > That is replace the MSR write to disable speculative execution with a > non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative? MSR_SPEC_CTRL is explicitly non-serialising, even with a plain WRMSR. non-serialising != non-speculative. Although WRMSRNS's precise statement on the matter of non-speculativeness is woolly, given an intent to optimise it some more in the future. ~Andrew
On March 31, 2025 1:27:23 PM PDT, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> wrote: >On Mon, Mar 31, 2025 at 01:22:46AM -0700, Xin Li (Intel) wrote: >> Signed-off-by: Xin Li (Intel) <xin@zytor.com> >> --- >> arch/x86/include/asm/msr-index.h | 6 ++++++ >> arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- >> 2 files changed, 30 insertions(+), 4 deletions(-) >> >> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h >> index e6134ef2263d..04244c3ba374 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -1226,4 +1226,10 @@ >> * a #GP >> */ >> >> +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ >> +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) >> + >> +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ >> +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) >> + >> #endif /* _ASM_X86_MSR_INDEX_H */ >> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S >> index f6986dee6f8c..9fae43723c44 100644 >> --- a/arch/x86/kvm/vmx/vmenter.S >> +++ b/arch/x86/kvm/vmx/vmenter.S >> @@ -64,6 +64,29 @@ >> RET >> .endm >> >> +/* >> + * Write EAX to MSR_IA32_SPEC_CTRL. >> + * >> + * Choose the best WRMSR instruction based on availability. >> + * >> + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. >> + */ >> +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL >> + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ds wrmsr), \ >> + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ >> + xor %edx, %edx; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS), \ >> + X86_FEATURE_WRMSRNS, \ >> + __stringify(xor %_ASM_AX, %_ASM_AX; \ >> + mov %edi, %eax; \ >> + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ >> + X86_FEATURE_MSR_IMM >> +.endm >> + >> .section .noinstr.text, "ax" >> >> /** >> @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) >> movl PER_CPU_VAR(x86_spec_ctrl_current), %esi >> cmp %edi, %esi >> je .Lspec_ctrl_done >> - mov $MSR_IA32_SPEC_CTRL, %ecx >> - xor %edx, %edx >> - mov %edi, %eax >> - wrmsr > >Is that the right path forward? > >That is replace the MSR write to disable speculative execution with a >non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative? > > >> + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL >> >> .Lspec_ctrl_done: >> >> -- >> 2.49.0 >> >> So to clarify the semantics of WRMSRNS: it is an opt-in capability for the OS to allow the hardware to choose the amount of serialization needed on an MSR- or implementation-specific basis. It also allows the OS to set multiple MSRs followed by a SERIALIZE instruction if full hard serialization is still desired, rather than having each individual MSR write do a full hard serialization (killing the full pipe and starting over from instruction fetch.) This will replace the – architecturally questionable, in my opinion – practice of introducing non-serializing MSRs which after all are retroactive changes to the semantics WRMSR instruction with no opt-out (although the existence of SERIALIZE improves the situation somewhat.) I agree that we need better documentation as to the semantics of WRMSRNS on critical MSRs like SPEC_CTRL, and especially in that specific case, when post-batch SERIALIZE would be called for.
On 3/31/25 13:41, Andrew Cooper wrote: >> >> That is replace the MSR write to disable speculative execution with a >> non-serialized WRMSR? Doesn't that mean the WRMSRNS is speculative? > > MSR_SPEC_CTRL is explicitly non-serialising, even with a plain WRMSR. > > non-serialising != non-speculative. > > Although WRMSRNS's precise statement on the matter of > non-speculativeness is woolly, given an intent to optimise it some more > in the future. > To be specific, "serializing" is a much harder statement than "non-speculative." For architecturally non-serializing MSRs, WRMSRNS and WRMSR are equivalent (or to put it differently, WRMSR acts like WRMSRNS.) The advantage with making them explicitly WRMSRNS is that it allows for the substitution of the upcoming immediate forms. -hpa
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e6134ef2263d..04244c3ba374 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1226,4 +1226,10 @@ * a #GP */ +/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */ +#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6) + +/* Instruction opcode for the immediate form RDMSR/WRMSRNS */ +#define ASM_WRMSRNS_RAX _ASM_BYTES(0xc4,0xe7,0x7a,0xf6,0xc0) + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index f6986dee6f8c..9fae43723c44 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -64,6 +64,29 @@ RET .endm +/* + * Write EAX to MSR_IA32_SPEC_CTRL. + * + * Choose the best WRMSR instruction based on availability. + * + * Replace with 'wrmsrns' and 'wrmsrns %rax, $MSR_IA32_SPEC_CTRL' once binutils support them. + */ +.macro WRITE_EAX_TO_MSR_IA32_SPEC_CTRL + ALTERNATIVE_2 __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ + xor %edx, %edx; \ + mov %edi, %eax; \ + ds wrmsr), \ + __stringify(mov $MSR_IA32_SPEC_CTRL, %ecx; \ + xor %edx, %edx; \ + mov %edi, %eax; \ + ASM_WRMSRNS), \ + X86_FEATURE_WRMSRNS, \ + __stringify(xor %_ASM_AX, %_ASM_AX; \ + mov %edi, %eax; \ + ASM_WRMSRNS_RAX; .long MSR_IA32_SPEC_CTRL), \ + X86_FEATURE_MSR_IMM +.endm + .section .noinstr.text, "ax" /** @@ -123,10 +146,7 @@ SYM_FUNC_START(__vmx_vcpu_run) movl PER_CPU_VAR(x86_spec_ctrl_current), %esi cmp %edi, %esi je .Lspec_ctrl_done - mov $MSR_IA32_SPEC_CTRL, %ecx - xor %edx, %edx - mov %edi, %eax - wrmsr + WRITE_EAX_TO_MSR_IA32_SPEC_CTRL .Lspec_ctrl_done:
Signed-off-by: Xin Li (Intel) <xin@zytor.com> --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/vmx/vmenter.S | 28 ++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-)