diff mbox

[1/3] x86/kvm/vmx: read MSR_FS_BASE from current->thread

Message ID 20180312140300.6166-2-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vitaly Kuznetsov March 12, 2018, 2:02 p.m. UTC
vmx_save_host_state() is only called from kvm_arch_vcpu_ioctl_run() so
the context is pretty well defined. Read MSR_FS_BASE from
current->thread.fsbase after calling save_fsgs() which takes care of
X86_BUG_NULL_SEG case now and will do RD[FG,GS]BASE when FSGSBASE
extensions are exposed to userspace (currently they are not).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/processor.h |  3 +++
 arch/x86/kernel/process_64.c     | 20 ++++++++++++++++++++
 arch/x86/kvm/vmx.c               |  4 +++-
 3 files changed, 26 insertions(+), 1 deletion(-)

Comments

Andy Lutomirski March 12, 2018, 3:34 p.m. UTC | #1
On Mon, Mar 12, 2018 at 2:02 PM, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> vmx_save_host_state() is only called from kvm_arch_vcpu_ioctl_run() so
> the context is pretty well defined. Read MSR_FS_BASE from
> current->thread.fsbase after calling save_fsgs() which takes care of
> X86_BUG_NULL_SEG case now and will do RD[FG,GS]BASE when FSGSBASE
> extensions are exposed to userspace (currently they are not).
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/include/asm/processor.h |  3 +++
>  arch/x86/kernel/process_64.c     | 20 ++++++++++++++++++++
>  arch/x86/kvm/vmx.c               |  4 +++-
>  3 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index b0ccd4847a58..006352b85ba3 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -410,6 +410,9 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
>  DECLARE_PER_CPU(char *, irq_stack_ptr);
>  DECLARE_PER_CPU(unsigned int, irq_count);
>  extern asmlinkage void ignore_sysret(void);
> +
> +/* Save actual FS/GS selectors and bases to current->thread */
> +void save_current_fsgs(void);
>  #else  /* X86_64 */
>  #ifdef CONFIG_CC_STACKPROTECTOR
>  /*
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 9eb448c7859d..eb907fefe02e 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -205,6 +205,26 @@ static __always_inline void save_fsgs(struct task_struct *task)
>         save_base_legacy(task, task->thread.gsindex, GS);
>  }
>
> +/*
> + * Currently, the only way for processes to change their FS/GS base is to call
> + * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
> + * There are, however, additional considerations:
> + *
> + * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
> + * the base and on some it doesn't, we need to check for that
> + * (see save_base_legacy()).
> + *
> + * When FSGSBASE extensions are enabled userspace processes will be able to
> + * change their FS/GS bases without kernel intervention. save_fsgs() will
> + * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
> + * instructions.
> + */

This is all a very complicated way to say "while a process is running,
current->thread.fsbase and current->thread.gsbase may not match the
corresponding CPU registers.  KVM wants an efficient way to save and
restore FSBASE and GSBASE."

And how about changing this to:

#if IS_ENABLED(CONFIG_KVM)
void save_fsgs_for_kvm(void)
{
    save_fsgs(current);
}
EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);

--Andy
Vitaly Kuznetsov March 12, 2018, 3:55 p.m. UTC | #2
Andy Lutomirski <luto@kernel.org> writes:

> On Mon, Mar 12, 2018 at 2:02 PM, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>> vmx_save_host_state() is only called from kvm_arch_vcpu_ioctl_run() so
>> the context is pretty well defined. Read MSR_FS_BASE from
>> current->thread.fsbase after calling save_fsgs() which takes care of
>> X86_BUG_NULL_SEG case now and will do RD[FG,GS]BASE when FSGSBASE
>> extensions are exposed to userspace (currently they are not).
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  arch/x86/include/asm/processor.h |  3 +++
>>  arch/x86/kernel/process_64.c     | 20 ++++++++++++++++++++
>>  arch/x86/kvm/vmx.c               |  4 +++-
>>  3 files changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>> index b0ccd4847a58..006352b85ba3 100644
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -410,6 +410,9 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
>>  DECLARE_PER_CPU(char *, irq_stack_ptr);
>>  DECLARE_PER_CPU(unsigned int, irq_count);
>>  extern asmlinkage void ignore_sysret(void);
>> +
>> +/* Save actual FS/GS selectors and bases to current->thread */
>> +void save_current_fsgs(void);
>>  #else  /* X86_64 */
>>  #ifdef CONFIG_CC_STACKPROTECTOR
>>  /*
>> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>> index 9eb448c7859d..eb907fefe02e 100644
>> --- a/arch/x86/kernel/process_64.c
>> +++ b/arch/x86/kernel/process_64.c
>> @@ -205,6 +205,26 @@ static __always_inline void save_fsgs(struct task_struct *task)
>>         save_base_legacy(task, task->thread.gsindex, GS);
>>  }
>>
>> +/*
>> + * Currently, the only way for processes to change their FS/GS base is to call
>> + * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
>> + * There are, however, additional considerations:
>> + *
>> + * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
>> + * the base and on some it doesn't, we need to check for that
>> + * (see save_base_legacy()).
>> + *
>> + * When FSGSBASE extensions are enabled userspace processes will be able to
>> + * change their FS/GS bases without kernel intervention. save_fsgs() will
>> + * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
>> + * instructions.
>> + */
>
> This is all a very complicated way to say "while a process is running,
> current->thread.fsbase and current->thread.gsbase may not match the
> corresponding CPU registers.  KVM wants an efficient way to save and
> restore FSBASE and GSBASE."
>

Well, I though it is not really obvious why "current->thread.fsbase and
current->thread.gsbase may not match the corresponding CPU registers"
:-) but save_base_legacy() is really nearby so I'll trim my comment in
v2.

> And how about changing this to:
>
> #if IS_ENABLED(CONFIG_KVM)
> void save_fsgs_for_kvm(void)
> {
>     save_fsgs(current);
> }
> EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
>

Sure. Actually, there is nothing KVM-specific in this function but KVM
will probably be the only user for the time being.
Paolo Bonzini March 12, 2018, 4:03 p.m. UTC | #3
On 12/03/2018 15:02, Vitaly Kuznetsov wrote:
>  
> +/*
> + * Currently, the only way for processes to change their FS/GS base is to call
> + * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
> + * There are, however, additional considerations:
> + *
> + * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
> + * the base and on some it doesn't, we need to check for that
> + * (see save_base_legacy()).
> + *
> + * When FSGSBASE extensions are enabled userspace processes will be able to
> + * change their FS/GS bases without kernel intervention. save_fsgs() will
> + * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
> + * instructions.
> + */
> +void save_current_fsgs(void)
> +{
> +	save_fsgs(current);
> +}
> +EXPORT_SYMBOL_GPL(save_current_fsgs);

We don't really need save_fsgs in KVM though.  We already do the
savesegment ourselves, and we know Intel CPUs don't have
X86_BUG_NULL_SEG.  So this:

        savesegment(fs, vmx->host_state.fs_sel);
        if (!(vmx->host_state.fs_sel & 7)) {
                vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                vmx->host_state.fs_reload_needed = 0;
        } else {
                vmcs_write16(HOST_FS_SELECTOR, 0);
                vmx->host_state.fs_reload_needed = 1;
        }
        savesegment(gs, vmx->host_state.gs_sel);
	...

could probably become simply:

        savesegment(fs, vmx->host_state.fs_sel);
	/*
	 * When FSGSBASE extensions are enabled, this will have to use
	 * RD{FS,GS}BASE instead of accessing current, and the
	 * corresponding WR{FS,GS}BASE should be done unconditionally,
	 * even if fs_reload_needed (resp. gs_ldt_reload_needed) is 1.
	 */
        if (vmx->host_state.fs_sel <= 3) {
                vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                vmcs_write16(HOST_FS_BASE, current->thread.fsbase);
                vmx->host_state.fs_reload_needed = 0;
        } else {
                vmcs_write16(HOST_FS_SELECTOR, 0);
                vmcs_write16(HOST_FS_BASE, 0);
                vmx->host_state.fs_reload_needed = 1;
        }
        savesegment(gs, vmx->host_state.gs_sel);
	...

?

Paolo
Andy Lutomirski March 12, 2018, 4:13 p.m. UTC | #4
On Mon, Mar 12, 2018 at 4:03 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 12/03/2018 15:02, Vitaly Kuznetsov wrote:
>>
>> +/*
>> + * Currently, the only way for processes to change their FS/GS base is to call
>> + * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
>> + * There are, however, additional considerations:
>> + *
>> + * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
>> + * the base and on some it doesn't, we need to check for that
>> + * (see save_base_legacy()).
>> + *
>> + * When FSGSBASE extensions are enabled userspace processes will be able to
>> + * change their FS/GS bases without kernel intervention. save_fsgs() will
>> + * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
>> + * instructions.
>> + */
>> +void save_current_fsgs(void)
>> +{
>> +     save_fsgs(current);
>> +}
>> +EXPORT_SYMBOL_GPL(save_current_fsgs);
>
> We don't really need save_fsgs in KVM though.  We already do the
> savesegment ourselves, and we know Intel CPUs don't have
> X86_BUG_NULL_SEG.  So this:
>
>         savesegment(fs, vmx->host_state.fs_sel);
>         if (!(vmx->host_state.fs_sel & 7)) {
>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>                 vmx->host_state.fs_reload_needed = 0;
>         } else {
>                 vmcs_write16(HOST_FS_SELECTOR, 0);
>                 vmx->host_state.fs_reload_needed = 1;
>         }
>         savesegment(gs, vmx->host_state.gs_sel);
>         ...
>
> could probably become simply:
>
>         savesegment(fs, vmx->host_state.fs_sel);
>         /*
>          * When FSGSBASE extensions are enabled, this will have to use
>          * RD{FS,GS}BASE instead of accessing current, and the
>          * corresponding WR{FS,GS}BASE should be done unconditionally,
>          * even if fs_reload_needed (resp. gs_ldt_reload_needed) is 1.
>          */
>         if (vmx->host_state.fs_sel <= 3) {
>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>                 vmcs_write16(HOST_FS_BASE, current->thread.fsbase);
>                 vmx->host_state.fs_reload_needed = 0;
>         } else {
>                 vmcs_write16(HOST_FS_SELECTOR, 0);
>                 vmcs_write16(HOST_FS_BASE, 0);
>                 vmx->host_state.fs_reload_needed = 1;
>         }
>         savesegment(gs, vmx->host_state.gs_sel);
>         ...
>
> ?
>

Hmm, probably, although this still gets the case where the user writes
0 to %fs wrong.  Of course, save_fsgs() also gets that wrong.

I'm okay with this variant as long as you add a comment to
save_..._legacy pointing at the KVM code.
Paolo Bonzini March 12, 2018, 4:18 p.m. UTC | #5
On 12/03/2018 17:13, Andy Lutomirski wrote:
>>
>>         savesegment(fs, vmx->host_state.fs_sel);
>>         /*
>>          * When FSGSBASE extensions are enabled, this will have to use
>>          * RD{FS,GS}BASE instead of accessing current, and the
>>          * corresponding WR{FS,GS}BASE should be done unconditionally,
>>          * even if fs_reload_needed (resp. gs_ldt_reload_needed) is 1.
>>          */
>>         if (vmx->host_state.fs_sel <= 3) {
>>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>>                 vmcs_write16(HOST_FS_BASE, current->thread.fsbase);
>>                 vmx->host_state.fs_reload_needed = 0;
>>         } else {
>>                 vmcs_write16(HOST_FS_SELECTOR, 0);
>>                 vmcs_write16(HOST_FS_BASE, 0);
>>                 vmx->host_state.fs_reload_needed = 1;
>>         }
>>         savesegment(gs, vmx->host_state.gs_sel);
>>         ...
>>
>> ?
>>
> Hmm, probably, although this still gets the case where the user writes
> 0 to %fs wrong.  Of course, save_fsgs() also gets that wrong.
> 
> I'm okay with this variant as long as you add a comment to
> save_..._legacy pointing at the KVM code.

Why in save_..._legacy?  If it is about FSGSBASE, shouldn't it be in
save_fsgs?  (Or if not I'm missing what the comment should be about).

Paolo
Andy Lutomirski March 12, 2018, 5 p.m. UTC | #6
On Mon, Mar 12, 2018 at 4:18 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 12/03/2018 17:13, Andy Lutomirski wrote:
>>>
>>>         savesegment(fs, vmx->host_state.fs_sel);
>>>         /*
>>>          * When FSGSBASE extensions are enabled, this will have to use
>>>          * RD{FS,GS}BASE instead of accessing current, and the
>>>          * corresponding WR{FS,GS}BASE should be done unconditionally,
>>>          * even if fs_reload_needed (resp. gs_ldt_reload_needed) is 1.
>>>          */
>>>         if (vmx->host_state.fs_sel <= 3) {
>>>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>>>                 vmcs_write16(HOST_FS_BASE, current->thread.fsbase);
>>>                 vmx->host_state.fs_reload_needed = 0;
>>>         } else {
>>>                 vmcs_write16(HOST_FS_SELECTOR, 0);
>>>                 vmcs_write16(HOST_FS_BASE, 0);
>>>                 vmx->host_state.fs_reload_needed = 1;
>>>         }
>>>         savesegment(gs, vmx->host_state.gs_sel);
>>>         ...
>>>
>>> ?
>>>
>> Hmm, probably, although this still gets the case where the user writes
>> 0 to %fs wrong.  Of course, save_fsgs() also gets that wrong.
>>
>> I'm okay with this variant as long as you add a comment to
>> save_..._legacy pointing at the KVM code.
>
> Why in save_..._legacy?  If it is about FSGSBASE, shouldn't it be in
> save_fsgs?  (Or if not I'm missing what the comment should be about).

It could be in save_fsgs(), I guess.  The main point is to make it
clear to readers of the code in save_fsgs(), the legacy helpers, etc
that there's another piece of code in KVM that makes the same set of
somewhat problematic assumptions and that will need updating for
FSGSBASE.

I'm moderately confident that someone from Intel is currently working
on FSGSBASE, but all I've seen lately is a bunch of kbuild bot errors
:(

--Amdy
Paolo Bonzini March 12, 2018, 5:11 p.m. UTC | #7
On 12/03/2018 18:00, Andy Lutomirski wrote:
>> Why in save_..._legacy?  If it is about FSGSBASE, shouldn't it be in
>> save_fsgs?  (Or if not I'm missing what the comment should be about).
> It could be in save_fsgs(), I guess.  The main point is to make it
> clear to readers of the code in save_fsgs(), the legacy helpers, etc
> that there's another piece of code in KVM that makes the same set of
> somewhat problematic assumptions and that will need updating for
> FSGSBASE.

Okay, that's a good idea.

Paolo
Vitaly Kuznetsov March 13, 2018, 1:20 p.m. UTC | #8
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 12/03/2018 15:02, Vitaly Kuznetsov wrote:
>>  
>> +/*
>> + * Currently, the only way for processes to change their FS/GS base is to call
>> + * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
>> + * There are, however, additional considerations:
>> + *
>> + * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
>> + * the base and on some it doesn't, we need to check for that
>> + * (see save_base_legacy()).
>> + *
>> + * When FSGSBASE extensions are enabled userspace processes will be able to
>> + * change their FS/GS bases without kernel intervention. save_fsgs() will
>> + * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
>> + * instructions.
>> + */
>> +void save_current_fsgs(void)
>> +{
>> +	save_fsgs(current);
>> +}
>> +EXPORT_SYMBOL_GPL(save_current_fsgs);
>
> We don't really need save_fsgs in KVM though.  We already do the
> savesegment ourselves, and we know Intel CPUs don't have
> X86_BUG_NULL_SEG.  So this:
>
>         savesegment(fs, vmx->host_state.fs_sel);
>         if (!(vmx->host_state.fs_sel & 7)) {
>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>                 vmx->host_state.fs_reload_needed = 0;
>         } else {
>                 vmcs_write16(HOST_FS_SELECTOR, 0);
>                 vmx->host_state.fs_reload_needed = 1;
>         }
>         savesegment(gs, vmx->host_state.gs_sel);
> 	...
>
> could probably become simply:
>
>         savesegment(fs, vmx->host_state.fs_sel);
> 	/*
> 	 * When FSGSBASE extensions are enabled, this will have to use
> 	 * RD{FS,GS}BASE instead of accessing current, and the
> 	 * corresponding WR{FS,GS}BASE should be done unconditionally,
> 	 * even if fs_reload_needed (resp. gs_ldt_reload_needed) is 1.
> 	 */
>         if (vmx->host_state.fs_sel <= 3) {
>                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
>                 vmcs_write16(HOST_FS_BASE, current->thread.fsbase);

vmcs_writel() I guess ... and, just to make sure I follow your
suggestion, this is for x86_64 only, right? x86_32 does

vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));

and I think it needs to stay.

(personally, I'm rather for exporting save_fsgs(), dropping
savesegment() and getting all we need from current to avoid propagating
assumptions but I'm flexible)
Paolo Bonzini March 13, 2018, 1:24 p.m. UTC | #9
On 13/03/2018 14:20, Vitaly Kuznetsov wrote:
> vmcs_writel() I guess ... and, just to make sure I follow your
> suggestion, this is for x86_64 only, right? x86_32 does
> 
> vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
> 
> and I think it needs to stay.

Yes.

> (personally, I'm rather for exporting save_fsgs(), dropping
> savesegment() and getting all we need from current to avoid propagating
> assumptions but I'm flexible)

Yes, that's fine too, as long as it's not mix and match. :)

Paolo
diff mbox

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b0ccd4847a58..006352b85ba3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -410,6 +410,9 @@  DECLARE_INIT_PER_CPU(irq_stack_union);
 DECLARE_PER_CPU(char *, irq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
 extern asmlinkage void ignore_sysret(void);
+
+/* Save actual FS/GS selectors and bases to current->thread */
+void save_current_fsgs(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9eb448c7859d..eb907fefe02e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -205,6 +205,26 @@  static __always_inline void save_fsgs(struct task_struct *task)
 	save_base_legacy(task, task->thread.gsindex, GS);
 }
 
+/*
+ * Currently, the only way for processes to change their FS/GS base is to call
+ * ARCH_SET_FS/GS prctls and these reflect changes they make in task->thread.
+ * There are, however, additional considerations:
+ *
+ * There is X86_BUG_NULL_SEG: on some CPUs writing '0' to FS/GS selectors zeroes
+ * the base and on some it doesn't, we need to check for that
+ * (see save_base_legacy()).
+ *
+ * When FSGSBASE extensions are enabled userspace processes will be able to
+ * change their FS/GS bases without kernel intervention. save_fsgs() will
+ * have to be updated to actually read FS and GS bases with RD[FG,GS]BASE
+ * instructions.
+ */
+void save_current_fsgs(void)
+{
+	save_fsgs(current);
+}
+EXPORT_SYMBOL_GPL(save_current_fsgs);
+
 static __always_inline void loadseg(enum which_selector which,
 				    unsigned short sel)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 051dab74e4e9..e46b7b24ebae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2157,7 +2157,9 @@  static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 #endif
 
 #ifdef CONFIG_X86_64
-	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
+	/* Synchronize FS and GS bases to current->thread first */
+	save_current_fsgs();
+	vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
 	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
 #else
 	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));