diff mbox series

[v2,10/25] KVM: VMX: Add support for FRED context save/restore

Message ID 20240207172646.3981-11-xin3.li@intel.com (mailing list archive)
State New
Headers show
Series Enable FRED with KVM VMX | expand

Commit Message

Li, Xin3 Feb. 7, 2024, 5:26 p.m. UTC
Handle host initiated FRED MSR access requests to allow FRED context
to be set/get from user level.

During VM save/restore and live migration, FRED context needs to be
saved/restored, which requires FRED MSRs to be accessed from a user
level application, e.g., Qemu.

Note, handling of MSR_IA32_FRED_SSP0, i.e., MSR_IA32_PL0_SSP, is not
added yet, which is done in the KVM CET patch set.

Signed-off-by: Xin Li <xin3.li@intel.com>
Tested-by: Shan Kang <shan.kang@intel.com>
---

Changes since v1:
* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() (Chao Gao).
* Fail host requested FRED MSRs access if KVM cannot virtualize FRED
  (Chao Gao).
* Handle the case FRED MSRs are valid but KVM cannot virtualize FRED
  (Chao Gao).
* Add sanity checks when writing to FRED MSRs.
---
 arch/x86/kvm/vmx/vmx.c | 72 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c     | 47 +++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

Comments

Chao Gao April 29, 2024, 6:31 a.m. UTC | #1
On Thu, Feb 08, 2024 at 01:26:30AM +0800, Xin Li wrote:
>Handle host initiated FRED MSR access requests to allow FRED context
>to be set/get from user level.
>

The changelog isn't accurate because guest accesses are also handled
by this patch, specifically in the "else" branch.

	>+		if (host_initiated) {
	>+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
	>+				return 1;
	>+		} else {



> void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>@@ -2019,6 +2037,33 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 	case MSR_KERNEL_GS_BASE:
> 		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
> 		break;
>+	case MSR_IA32_FRED_RSP0:
>+		msr_info->data = vmx_read_guest_fred_rsp0(vmx);
>+		break;
>+	case MSR_IA32_FRED_RSP1:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP1);
>+		break;
>+	case MSR_IA32_FRED_RSP2:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP2);
>+		break;
>+	case MSR_IA32_FRED_RSP3:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP3);
>+		break;
>+	case MSR_IA32_FRED_STKLVLS:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
>+		break;
>+	case MSR_IA32_FRED_SSP1:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP1);
>+		break;
>+	case MSR_IA32_FRED_SSP2:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP2);
>+		break;
>+	case MSR_IA32_FRED_SSP3:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP3);
>+		break;
>+	case MSR_IA32_FRED_CONFIG:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_CONFIG);
>+		break;

how about adding a helper function to convert MSR index to the VMCS field id?
Then do:

	case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
	case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
		msr_info->data = vmcs_read64(msr_to_vmcs(index));
		break;

and ...

> #endif
> 	case MSR_EFER:
> 		return kvm_get_msr_common(vcpu, msr_info);
>@@ -2226,6 +2271,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 			vmx_update_exception_bitmap(vcpu);
> 		}
> 		break;
>+	case MSR_IA32_FRED_RSP0:
>+		vmx_write_guest_fred_rsp0(vmx, data);
>+		break;
>+	case MSR_IA32_FRED_RSP1:
>+		vmcs_write64(GUEST_IA32_FRED_RSP1, data);
>+		break;
>+	case MSR_IA32_FRED_RSP2:
>+		vmcs_write64(GUEST_IA32_FRED_RSP2, data);
>+		break;
>+	case MSR_IA32_FRED_RSP3:
>+		vmcs_write64(GUEST_IA32_FRED_RSP3, data);
>+		break;
>+	case MSR_IA32_FRED_STKLVLS:
>+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, data);
>+		break;
>+	case MSR_IA32_FRED_SSP1:
>+		vmcs_write64(GUEST_IA32_FRED_SSP1, data);
>+		break;
>+	case MSR_IA32_FRED_SSP2:
>+		vmcs_write64(GUEST_IA32_FRED_SSP2, data);
>+		break;
>+	case MSR_IA32_FRED_SSP3:
>+		vmcs_write64(GUEST_IA32_FRED_SSP3, data);
>+		break;
>+	case MSR_IA32_FRED_CONFIG:
>+		vmcs_write64(GUEST_IA32_FRED_CONFIG, data);
>+		break;

	case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
	case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
		vmcs_write64(msr_to_vmcs(index), data);
		break;

The code will be more compact and generate less instructions.  I believe CET
series can do the same change [*]. Performance here isn't critical. I just
think it looks cumbersome to repeat the same pattern for 8 (and more with
CET considered) MSRs.

[*]: https://lore.kernel.org/kvm/20240219074733.122080-21-weijiang.yang@intel.com/

> #endif
> 	case MSR_IA32_SYSENTER_CS:
> 		if (is_guest_mode(vcpu))
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 363b1c080205..4e8d60f248e3 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -1451,6 +1451,9 @@ static const u32 msrs_to_save_base[] = {
> 	MSR_STAR,
> #ifdef CONFIG_X86_64
> 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
>+	MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2,
>+	MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1,
>+	MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG,
> #endif
> 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
> 	MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
>@@ -1892,6 +1895,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> 			return 1;
> 
> 		data = (u32)data;
>+		break;
>+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
>+		if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
>+			return 1;
>+		if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
>+		    (data & GENMASK_ULL(5, 0)))
>+			return 1;
>+		if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
>+		    (data & GENMASK_ULL(2, 0)))
>+			return 1;
>+
>+		if (host_initiated) {
>+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
>+				return 1;

Should be:
			if (!kvm_cpu_cap_has(X86_FEATURE_FRED) && data)

KVM ABI allows userspace to write only 0 if guests cannot enumerate the
feature. And even better, your next version can be on top of Sean's series

https://lore.kernel.org/kvm/20240425181422.3250947-1-seanjc@google.com/T/#md00be687770e1e658fc9fe0eac20b5f0bd230e4c

this way, you can get rid of the "host_initiated" check.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 264378c3b784..ee61d2c25cb0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1420,6 +1420,24 @@  static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 	preempt_enable();
 	vmx->msr_guest_kernel_gs_base = data;
 }
+
+static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx)
+{
+	preempt_disable();
+	if (vmx->guest_state_loaded)
+		vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
+	preempt_enable();
+	return vmx->msr_guest_fred_rsp0;
+}
+
+static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data)
+{
+	preempt_disable();
+	if (vmx->guest_state_loaded)
+		wrmsrl(MSR_IA32_FRED_RSP0, data);
+	preempt_enable();
+	vmx->msr_guest_fred_rsp0 = data;
+}
 #endif
 
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
@@ -2019,6 +2037,33 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_KERNEL_GS_BASE:
 		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
 		break;
+	case MSR_IA32_FRED_RSP0:
+		msr_info->data = vmx_read_guest_fred_rsp0(vmx);
+		break;
+	case MSR_IA32_FRED_RSP1:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP1);
+		break;
+	case MSR_IA32_FRED_RSP2:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP2);
+		break;
+	case MSR_IA32_FRED_RSP3:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP3);
+		break;
+	case MSR_IA32_FRED_STKLVLS:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
+		break;
+	case MSR_IA32_FRED_SSP1:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP1);
+		break;
+	case MSR_IA32_FRED_SSP2:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP2);
+		break;
+	case MSR_IA32_FRED_SSP3:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP3);
+		break;
+	case MSR_IA32_FRED_CONFIG:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_CONFIG);
+		break;
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
@@ -2226,6 +2271,33 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			vmx_update_exception_bitmap(vcpu);
 		}
 		break;
+	case MSR_IA32_FRED_RSP0:
+		vmx_write_guest_fred_rsp0(vmx, data);
+		break;
+	case MSR_IA32_FRED_RSP1:
+		vmcs_write64(GUEST_IA32_FRED_RSP1, data);
+		break;
+	case MSR_IA32_FRED_RSP2:
+		vmcs_write64(GUEST_IA32_FRED_RSP2, data);
+		break;
+	case MSR_IA32_FRED_RSP3:
+		vmcs_write64(GUEST_IA32_FRED_RSP3, data);
+		break;
+	case MSR_IA32_FRED_STKLVLS:
+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, data);
+		break;
+	case MSR_IA32_FRED_SSP1:
+		vmcs_write64(GUEST_IA32_FRED_SSP1, data);
+		break;
+	case MSR_IA32_FRED_SSP2:
+		vmcs_write64(GUEST_IA32_FRED_SSP2, data);
+		break;
+	case MSR_IA32_FRED_SSP3:
+		vmcs_write64(GUEST_IA32_FRED_SSP3, data);
+		break;
+	case MSR_IA32_FRED_CONFIG:
+		vmcs_write64(GUEST_IA32_FRED_CONFIG, data);
+		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
 		if (is_guest_mode(vcpu))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 363b1c080205..4e8d60f248e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1451,6 +1451,9 @@  static const u32 msrs_to_save_base[] = {
 	MSR_STAR,
 #ifdef CONFIG_X86_64
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+	MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2,
+	MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1,
+	MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG,
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
 	MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
@@ -1892,6 +1895,30 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 			return 1;
 
 		data = (u32)data;
+		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
+			return 1;
+		if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
+		    (data & GENMASK_ULL(5, 0)))
+			return 1;
+		if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
+		    (data & GENMASK_ULL(2, 0)))
+			return 1;
+
+		if (host_initiated) {
+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+				return 1;
+		} else {
+			/*
+			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
+			 * which also ensures no malicious guest can write to FRED
+			 * MSRs to corrupt host FRED MSRs.
+			 */
+			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
+				return 1;
+		}
+
 		break;
 	}
 
@@ -1936,6 +1963,22 @@  int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (host_initiated) {
+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+				return 1;
+		} else {
+			/*
+			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
+			 * which also ensures no malicious guest can write to FRED
+			 * MSRs to corrupt host FRED MSRs.
+			 */
+			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
+				return 1;
+		}
+
+		break;
+
 	}
 
 	msr.index = index;
@@ -7364,6 +7407,10 @@  static void kvm_probe_msr_to_save(u32 msr_index)
 		if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
 			return;
 		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+			return;
+		break;
 	default:
 		break;
 	}