@@ -65,6 +65,30 @@ static int vt_mem_enc_op(struct kvm *kvm, void __user *argp)
return tdx_vm_ioctl(kvm, argp);
}
+static int vt_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return tdx_vcpu_create(vcpu);
+
+ return vmx_vcpu_create(vcpu);
+}
+
+static void vt_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu))
+ return tdx_vcpu_free(vcpu);
+
+ return vmx_vcpu_free(vcpu);
+}
+
+static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ if (is_td_vcpu(vcpu))
+ return tdx_vcpu_reset(vcpu, init_event);
+
+ return vmx_vcpu_reset(vcpu, init_event);
+}
+
struct kvm_x86_ops vt_x86_ops __initdata = {
.name = "kvm_intel",
@@ -81,9 +105,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.mmu_prezap = vt_mmu_prezap,
.vm_free = vt_vm_free,
- .vcpu_create = vmx_vcpu_create,
- .vcpu_free = vmx_vcpu_free,
- .vcpu_reset = vmx_vcpu_reset,
+ .vcpu_create = vt_vcpu_create,
+ .vcpu_free = vt_vcpu_free,
+ .vcpu_reset = vt_vcpu_reset,
.prepare_guest_switch = vmx_prepare_switch_to_guest,
.vcpu_load = vmx_vcpu_load,
@@ -6,6 +6,7 @@
#include "capabilities.h"
#include "x86_ops.h"
#include "tdx.h"
+#include "x86.h"
#undef pr_fmt
#define pr_fmt(fmt) "tdx: " fmt
@@ -51,6 +52,11 @@ static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid)
return pa;
}
+static inline bool is_td_vcpu_created(struct vcpu_tdx *tdx)
+{
+ return tdx->tdvpr.added;
+}
+
static inline bool is_td_created(struct kvm_tdx *kvm_tdx)
{
return kvm_tdx->tdr.added;
@@ -349,6 +355,142 @@ int tdx_vm_init(struct kvm *kvm)
return ret;
}
+int tdx_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ int ret, i;
+
+ ret = tdx_alloc_td_page(&tdx->tdvpr);
+ if (ret)
+ return ret;
+
+ tdx->tdvpx = kcalloc(tdx_caps.tdvpx_nr_pages, sizeof(*tdx->tdvpx),
+ GFP_KERNEL_ACCOUNT);
+ if (!tdx->tdvpx) {
+ ret = -ENOMEM;
+ goto free_tdvpr;
+ }
+ for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++) {
+ ret = tdx_alloc_td_page(&tdx->tdvpx[i]);
+ if (ret)
+ goto free_tdvpx;
+ }
+
+ vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX;
+
+ vcpu->arch.cr0_guest_owned_bits = -1ul;
+ vcpu->arch.cr4_guest_owned_bits = -1ul;
+
+ vcpu->arch.tsc_offset = to_kvm_tdx(vcpu->kvm)->tsc_offset;
+ vcpu->arch.l1_tsc_offset = vcpu->arch.tsc_offset;
+ vcpu->arch.guest_state_protected =
+ !(to_kvm_tdx(vcpu->kvm)->attributes & TDX_TD_ATTRIBUTE_DEBUG);
+
+ return 0;
+
+free_tdvpx:
+ /* @i points at the TDVPX page that failed allocation. */
+ for (--i; i >= 0; i--)
+ free_page(tdx->tdvpx[i].va);
+ kfree(tdx->tdvpx);
+free_tdvpr:
+ free_page(tdx->tdvpr.va);
+
+ return ret;
+}
+
+void tdx_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ int i;
+
+ /* Can't reclaim or free pages if teardown failed. */
+ if (is_hkid_assigned(to_kvm_tdx(vcpu->kvm)))
+ return;
+
+ for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++)
+ tdx_reclaim_td_page(&tdx->tdvpx[i]);
+ kfree(tdx->tdvpx);
+ tdx_reclaim_td_page(&tdx->tdvpr);
+}
+
+void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ struct msr_data apic_base_msr;
+ u64 err;
+ int i;
+
+ /* TDX doesn't support INIT event. */
+ if (WARN_ON(init_event))
+ goto td_bugged;
+ /* TDX supports only X2APIC enabled. */
+ if (WARN_ON(!vcpu->arch.apic))
+ goto td_bugged;
+ if (WARN_ON(is_td_vcpu_created(tdx)))
+ goto td_bugged;
+
+ /*
+ * In TDX case, tsc frequency is per-VM and determined by the parameter
+ * tdh_mng_init(). Forcibly set it instead of max_tsc_khz set by
+ * kvm_arch_vcpu_create().
+ *
+ * This function is called after kvm_arch_vcpu_create() calling
+ * kvm_set_tsc_khz().
+ */
+ kvm_set_tsc_khz(vcpu, kvm_tdx->tsc_khz);
+
+ err = tdh_vp_create(kvm_tdx->tdr.pa, tdx->tdvpr.pa);
+ if (WARN_ON_ONCE(err)) {
+ pr_tdx_error(TDH_VP_CREATE, err, NULL);
+ goto td_bugged;
+ }
+ tdx_mark_td_page_added(&tdx->tdvpr);
+
+ for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++) {
+ err = tdh_vp_addcx(tdx->tdvpr.pa, tdx->tdvpx[i].pa);
+ if (WARN_ON_ONCE(err)) {
+ pr_tdx_error(TDH_VP_ADDCX, err, NULL);
+ goto td_bugged;
+ }
+ tdx_mark_td_page_added(&tdx->tdvpx[i]);
+ }
+
+ if (!vcpu->arch.cpuid_entries) {
+ /*
+ * On cpu creation, cpuid entry is blank. Forcibly enable
+ * X2APIC feature to allow X2APIC.
+ */
+ struct kvm_cpuid_entry2 *e;
+
+ e = kvmalloc_array(1, sizeof(*e), GFP_KERNEL_ACCOUNT);
+ *e = (struct kvm_cpuid_entry2) {
+ .function = 1, /* Features for X2APIC */
+ .index = 0,
+ .eax = 0,
+ .ebx = 0,
+ .ecx = 1ULL << 21, /* X2APIC */
+ .edx = 0,
+ };
+ vcpu->arch.cpuid_entries = e;
+ vcpu->arch.cpuid_nent = 1;
+ }
+ apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC;
+ if (kvm_vcpu_is_reset_bsp(vcpu))
+ apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+ apic_base_msr.host_initiated = true;
+ if (WARN_ON(kvm_set_apic_base(vcpu, &apic_base_msr)))
+ goto td_bugged;
+
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+ return;
+
+td_bugged:
+ vcpu->kvm->vm_bugged = true;
+}
+
static int tdx_capabilities(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
{
struct kvm_tdx_capabilities __user *user_caps;
@@ -41,6 +41,7 @@ static inline u64 tdh_mem_sept_add(hpa_t tdr, gpa_t gpa, int level, hpa_t page,
static inline u64 tdh_vp_addcx(hpa_t tdvpr, hpa_t addr)
{
+ tdx_clflush_page(addr);
return kvm_seamcall(TDH_VP_ADDCX, addr, tdvpr, 0, 0, 0, NULL);
}
@@ -69,6 +70,7 @@ static inline u64 tdh_mng_create(hpa_t tdr, int hkid)
static inline u64 tdh_vp_create(hpa_t tdr, hpa_t tdvpr)
{
+ tdx_clflush_page(tdvpr);
return kvm_seamcall(TDH_VP_CREATE, tdvpr, tdr, 0, 0, 0, NULL);
}
@@ -137,6 +137,10 @@ int tdx_vm_init(struct kvm *kvm);
void tdx_mmu_prezap(struct kvm *kvm);
void tdx_vm_free(struct kvm *kvm);
+int tdx_vcpu_create(struct kvm_vcpu *vcpu);
+void tdx_vcpu_free(struct kvm_vcpu *vcpu);
+void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+
int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
#else
static inline void tdx_pre_kvm_init(
@@ -149,6 +153,10 @@ static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; }
static inline void tdx_mmu_prezap(struct kvm *kvm) {}
static inline void tdx_vm_free(struct kvm *kvm) {}
+static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
+static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
+static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
+
static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
#endif
@@ -2322,7 +2322,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
return 0;
}
-static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
{
u32 thresh_lo, thresh_hi;
int use_scaling = 0;
@@ -2354,6 +2354,7 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
}
return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
}
+EXPORT_SYMBOL_GPL(kvm_set_tsc_khz);
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
{
@@ -303,6 +303,7 @@ extern int pi_inject_timer;
extern bool report_ignored_msrs;
extern unsigned long max_tsc_khz;
+int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{