diff mbox series

[01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base

Message ID 20211118110814.2568-2-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: X86: Miscellaneous cleanups | expand

Commit Message

Lai Jiangshan Nov. 18, 2021, 11:08 a.m. UTC
From: Lai Jiangshan <laijs@linux.alibaba.com>

And they use FSGSBASE instructions when enabled.

Cc: x86@kernel.org
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h | 10 ----------
 arch/x86/kernel/process_64.c    |  2 ++
 arch/x86/kvm/vmx/vmx.c          | 14 +++++++-------
 3 files changed, 9 insertions(+), 17 deletions(-)

Comments

Paolo Bonzini Nov. 18, 2021, 4:05 p.m. UTC | #1
On 11/18/21 12:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> And they use FSGSBASE instructions when enabled.
> 
> Cc: x86@kernel.org
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>

This needs ACK from x86 maintainers.

I queued 2-4 and 6-14.

Paolo

> ---
>   arch/x86/include/asm/kvm_host.h | 10 ----------
>   arch/x86/kernel/process_64.c    |  2 ++
>   arch/x86/kvm/vmx/vmx.c          | 14 +++++++-------
>   3 files changed, 9 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1fcb345bc107..4cbb402f5636 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1808,16 +1808,6 @@ static inline void kvm_load_ldt(u16 sel)
>   	asm("lldt %0" : : "rm"(sel));
>   }
>   
> -#ifdef CONFIG_X86_64
> -static inline unsigned long read_msr(unsigned long msr)
> -{
> -	u64 value;
> -
> -	rdmsrl(msr, value);
> -	return value;
> -}
> -#endif
> -
>   static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
>   {
>   	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 3402edec236c..296bd5c2e38b 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -443,6 +443,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
>   
>   	return gsbase;
>   }
> +EXPORT_SYMBOL_GPL(x86_gsbase_read_cpu_inactive);
>   
>   void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
>   {
> @@ -456,6 +457,7 @@ void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
>   		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
>   	}
>   }
> +EXPORT_SYMBOL_GPL(x86_gsbase_write_cpu_inactive);
>   
>   unsigned long x86_fsbase_read_task(struct task_struct *task)
>   {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 3127c66a1651..48a34d1a2989 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1156,11 +1156,11 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>   	} else {
>   		savesegment(fs, fs_sel);
>   		savesegment(gs, gs_sel);
> -		fs_base = read_msr(MSR_FS_BASE);
> -		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
> +		fs_base = x86_fsbase_read_cpu();
> +		vmx->msr_host_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   	}
>   
> -	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +	x86_gsbase_write_cpu_inactive(vmx->msr_guest_kernel_gs_base);
>   #else
>   	savesegment(fs, fs_sel);
>   	savesegment(gs, gs_sel);
> @@ -1184,7 +1184,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
>   	++vmx->vcpu.stat.host_state_reload;
>   
>   #ifdef CONFIG_X86_64
> -	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +	vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   #endif
>   	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
>   		kvm_load_ldt(host_state->ldt_sel);
> @@ -1204,7 +1204,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
>   #endif
>   	invalidate_tss_limit();
>   #ifdef CONFIG_X86_64
> -	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
> +	x86_gsbase_write_cpu_inactive(vmx->msr_host_kernel_gs_base);
>   #endif
>   	load_fixmap_gdt(raw_smp_processor_id());
>   	vmx->guest_state_loaded = false;
> @@ -1216,7 +1216,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
>   {
>   	preempt_disable();
>   	if (vmx->guest_state_loaded)
> -		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +		vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   	preempt_enable();
>   	return vmx->msr_guest_kernel_gs_base;
>   }
> @@ -1225,7 +1225,7 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
>   {
>   	preempt_disable();
>   	if (vmx->guest_state_loaded)
> -		wrmsrl(MSR_KERNEL_GS_BASE, data);
> +		x86_gsbase_write_cpu_inactive(data);
>   	preempt_enable();
>   	vmx->msr_guest_kernel_gs_base = data;
>   }
>
Thomas Gleixner Nov. 21, 2021, 3:17 p.m. UTC | #2
Lai,

On Thu, Nov 18 2021 at 19:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
>
> And they use FSGSBASE instructions when enabled.

That's really not a proper explanation for adding yet more exports.

Thanks,

        tglx
Lai Jiangshan Nov. 22, 2021, 3:27 a.m. UTC | #3
On 2021/11/21 23:17, Thomas Gleixner wrote:
> Lai,
> 
> On Thu, Nov 18 2021 at 19:08, Lai Jiangshan wrote:
>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>
>> And they use FSGSBASE instructions when enabled.
> 
> That's really not a proper explanation for adding yet more exports.
> 

Hello

---
When a vCPU thread is rescheduled, 1 rdmsr and 2 wrmsr are called for
MSR_KERNEL_GS_BASE.

In scheduler, the core kernel uses x86_gsbase_[read|write]_cpu_inactive()
to accelerate the access to inactive GSBASE, but when the scheduler calls
in the preemption notifier in kvm, {rd|wr}msr(MSR_KERNEL_GS_BASE) is used.

To make the way of how kvm access to inactive GSBASE consistent with the
scheduler, kvm is changed to use x86 core API to access to fs_base and
inactive gs_base.  And they use FSGSBASE instructions when enabled.

It would add 2 more exports, but it doesn't export any extra software nor
hardware resources since the resources can be access via {rd|wr}msr.
---

Not so persuasive.  If it needs to be accelerated in the preemption notifier,
there are some other more aggressive ways.

Thanks
Lai
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1fcb345bc107..4cbb402f5636 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1808,16 +1808,6 @@  static inline void kvm_load_ldt(u16 sel)
 	asm("lldt %0" : : "rm"(sel));
 }
 
-#ifdef CONFIG_X86_64
-static inline unsigned long read_msr(unsigned long msr)
-{
-	u64 value;
-
-	rdmsrl(msr, value);
-	return value;
-}
-#endif
-
 static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 {
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3402edec236c..296bd5c2e38b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -443,6 +443,7 @@  unsigned long x86_gsbase_read_cpu_inactive(void)
 
 	return gsbase;
 }
+EXPORT_SYMBOL_GPL(x86_gsbase_read_cpu_inactive);
 
 void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
 {
@@ -456,6 +457,7 @@  void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
 		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
 	}
 }
+EXPORT_SYMBOL_GPL(x86_gsbase_write_cpu_inactive);
 
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3127c66a1651..48a34d1a2989 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1156,11 +1156,11 @@  void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	} else {
 		savesegment(fs, fs_sel);
 		savesegment(gs, gs_sel);
-		fs_base = read_msr(MSR_FS_BASE);
-		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+		fs_base = x86_fsbase_read_cpu();
+		vmx->msr_host_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 	}
 
-	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	x86_gsbase_write_cpu_inactive(vmx->msr_guest_kernel_gs_base);
 #else
 	savesegment(fs, fs_sel);
 	savesegment(gs, gs_sel);
@@ -1184,7 +1184,7 @@  static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 	++vmx->vcpu.stat.host_state_reload;
 
 #ifdef CONFIG_X86_64
-	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 #endif
 	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
 		kvm_load_ldt(host_state->ldt_sel);
@@ -1204,7 +1204,7 @@  static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 #endif
 	invalidate_tss_limit();
 #ifdef CONFIG_X86_64
-	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+	x86_gsbase_write_cpu_inactive(vmx->msr_host_kernel_gs_base);
 #endif
 	load_fixmap_gdt(raw_smp_processor_id());
 	vmx->guest_state_loaded = false;
@@ -1216,7 +1216,7 @@  static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
 {
 	preempt_disable();
 	if (vmx->guest_state_loaded)
-		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+		vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 	preempt_enable();
 	return vmx->msr_guest_kernel_gs_base;
 }
@@ -1225,7 +1225,7 @@  static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 {
 	preempt_disable();
 	if (vmx->guest_state_loaded)
-		wrmsrl(MSR_KERNEL_GS_BASE, data);
+		x86_gsbase_write_cpu_inactive(data);
 	preempt_enable();
 	vmx->msr_guest_kernel_gs_base = data;
 }