@@ -966,6 +966,9 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ int (*get_nested)(struct kvm_vcpu *vcpu, struct kvm_nested *nested);
+ int (*set_nested)(struct kvm_vcpu *vcpu, struct kvm_nested *nested);
+
void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -157,6 +157,13 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
+struct kvm_nested {
+ __u8 vmxon;
+ __u64 vmxon_ptr;
+ __u64 current_vmptr;
+ __u64 vmcs01_debugctl;
+};
+
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
@@ -12419,6 +12419,47 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
return 1;
}
+static int vmx_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested)
+{
+ struct nested_vmx *nvmx = &to_vmx(vcpu)->nested;
+
+ if (!nested->vmxon)
+ return 0;
+
+ nvmx->vmxon_ptr = nested->vmxon_ptr;
+ enter_vmx_operation(vcpu);
+ nvmx->current_vmptr = nested->current_vmptr;
+ nvmx->vmcs01_debugctl = nested->vmcs01_debugctl;
+
+ if (nested->current_vmptr != -1ull) {
+ if (kvm_read_guest(vcpu->kvm, nvmx->current_vmptr,
+ nvmx->cached_vmcs12,
+ sizeof(*nvmx->cached_vmcs12)))
+ return -EFAULT;
+
+ set_current_vmptr(to_vmx(vcpu), nested->current_vmptr);
+ }
+
+ return 0;
+}
+
+static int vmx_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested)
+{
+ struct nested_vmx *nvmx = &to_vmx(vcpu)->nested;
+
+ memset(nested, 0, sizeof(*nested));
+
+ if (!nvmx->vmxon)
+ return 0;
+
+ nested->vmxon = true;
+ nested->vmxon_ptr = nvmx->vmxon_ptr;
+ nested->current_vmptr = nvmx->current_vmptr;
+ nested->vmcs01_debugctl = nvmx->vmcs01_debugctl;
+
+ return 0;
+}
+
static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -12512,6 +12553,9 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.tlb_flush = vmx_flush_tlb,
+ .set_nested = vmx_set_nested,
+ .get_nested = vmx_get_nested,
+
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
@@ -7543,6 +7543,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return r;
}
+int kvm_arch_vcpu_ioctl_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested)
+{
+ return kvm_x86_ops->get_nested(vcpu, nested);
+}
+
+int kvm_arch_vcpu_ioctl_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested)
+{
+ return kvm_x86_ops->set_nested(vcpu, nested);
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
@@ -776,6 +776,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
+int kvm_arch_vcpu_ioctl_get_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested);
+int kvm_arch_vcpu_ioctl_set_nested(struct kvm_vcpu *vcpu, struct kvm_nested *nested);
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
@@ -1467,6 +1467,9 @@ struct kvm_sev_dbg {
__u32 len;
};
+#define KVM_GET_NESTED _IOR(KVMIO, 0xba, struct kvm_nested)
+#define KVM_SET_NESTED _IOW(KVMIO, 0xbb, struct kvm_nested)
+
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
@@ -2661,6 +2661,30 @@ static long kvm_vcpu_ioctl(struct file *filp,
kfree(kvm_regs);
break;
}
+ case KVM_GET_NESTED: {
+ struct kvm_nested nested;
+
+ r = kvm_arch_vcpu_ioctl_get_nested(vcpu, &nested);
+ if (r)
+ break;
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &nested, sizeof(nested)))
+ break;
+
+ r = 0;
+ break;
+ }
+ case KVM_SET_NESTED: {
+ struct kvm_nested nested;
+
+ r = -EFAULT;
+ if (copy_from_user(&nested, argp, sizeof(nested)))
+ break;
+
+ r = kvm_arch_vcpu_ioctl_set_nested(vcpu, &nested);
+ break;
+ }
case KVM_GET_SREGS: {
kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
r = -ENOMEM;
Since L2 guest is running on L0 hypervisor directly, there is some state that L0 holds that is needed to safely save/resume L2. The rest of the state is constructed from the VMCS12 which is stored in the L1 hypervisor memory. NOTE: I am still debugging a VMEntry failure for L2 that occassionally happens after a loop of save and restore. Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/include/uapi/asm/kvm.h | 7 +++++++ arch/x86/kvm/vmx.c | 44 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 10 ++++++++++ include/linux/kvm_host.h | 2 ++ include/uapi/linux/kvm.h | 3 +++ virt/kvm/kvm_main.c | 24 ++++++++++++++++++++++ 7 files changed, 93 insertions(+)