Message ID | 20190626130927.121459-1-liran.alon@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: nVMX: Change KVM_STATE_NESTED_EVMCS to signal vmcs12 is copied from eVMCS | expand |
Gentle ping. > On 26 Jun 2019, at 16:09, Liran Alon <liran.alon@oracle.com> wrote: > > Currently KVM_STATE_NESTED_EVMCS is used to signal that eVMCS > capability is enabled on vCPU. > As indicated by vmx->nested.enlightened_vmcs_enabled. > > This is quite bizarre as userspace VMM should make sure to expose > same vCPU with same CPUID values in both source and destination. > In case vCPU is exposed with eVMCS support on CPUID, it is also > expected to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability. > Therefore, KVM_STATE_NESTED_EVMCS is redundant. > > KVM_STATE_NESTED_EVMCS is currently used on restore path > (vmx_set_nested_state()) only to enable eVMCS capability in KVM > and to signal need_vmcs12_sync such that on next VMEntry to guest > nested_sync_from_vmcs12() will be called to sync vmcs12 content > into eVMCS in guest memory. > However, because restore nested-state is rare enough, we could > have just modified vmx_set_nested_state() to always signal > need_vmcs12_sync. > > From all the above, it seems that we could have just removed > the usage of KVM_STATE_NESTED_EVMCS. However, in order to preserve > backwards migration compatibility, we cannot do that. > (vmx_get_nested_state() needs to signal flag when migrating from > new kernel to old kernel). > > Returning KVM_STATE_NESTED_EVMCS when just vCPU have eVMCS enabled > have a bad side-effect of userspace VMM having to send nested-state > from source to destination as part of migration stream. Even if > guest have never used eVMCS as it doesn't even run a nested > hypervisor workload. This requires destination userspace VMM and > KVM to support setting nested-state. Which make it more difficult > to migrate from new host to older host. > To avoid this, change KVM_STATE_NESTED_EVMCS to signal eVMCS is > not only enabled but also active. i.e. Guest have made some > eVMCS active via an enlightened VMEntry. i.e. vmcs12 is copied > from eVMCS and therefore should be restored into eVMCS resident > in memory (by copy_vmcs12_to_enlightened()). > > Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com> > Reviewed-by: Maran Wilson <maran.wilson@oracle.com> > Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> > Signed-off-by: Liran Alon <liran.alon@oracle.com> > --- > arch/x86/kvm/vmx/nested.c | 25 ++++++++++++------- > .../testing/selftests/kvm/x86_64/evmcs_test.c | 1 + > 2 files changed, 17 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index b66001fb0232..18efb338ed8a 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, > vmx = to_vmx(vcpu); > vmcs12 = get_vmcs12(vcpu); > > - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) > - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; > - > if (nested_vmx_allowed(vcpu) && > (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { > kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; > @@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, > if (vmx_has_valid_vmcs12(vcpu)) { > kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); > > + if (vmx->nested.hv_evmcs) > + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; > + > if (is_guest_mode(vcpu) && > nested_cpu_has_shadow_vmcs(vmcs12) && > vmcs12->vmcs_link_pointer != -1ull) > @@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) > return -EINVAL; > > + /* > + * KVM_STATE_NESTED_EVMCS used to signal that KVM should > + * enable eVMCS capability on vCPU. However, since then > + * code was changed such that flag signals vmcs12 should > + * be copied into eVMCS in guest memory. > + * > + * To preserve backwards compatability, allow user > + * to set this flag even when there is no VMXON region. > + */ > if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) > return -EINVAL; > } else { > @@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > > if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) > return -EINVAL; > - } > + } > > if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && > (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) > @@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) > return -EINVAL; > > - vmx_leave_nested(vcpu); > - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { > - if (!nested_vmx_allowed(vcpu)) > + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && > + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) > return -EINVAL; > > - nested_enable_evmcs(vcpu, NULL); > - } > + vmx_leave_nested(vcpu); > > if (kvm_state->hdr.vmx.vmxon_pa == -1ull) > return 0; > diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c > index b38260e29775..241919ef1eac 100644 > --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c > +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c > @@ -146,6 +146,7 @@ int main(int argc, char *argv[]) > kvm_vm_restart(vm, O_RDWR); > vm_vcpu_add(vm, VCPU_ID, 0, 0); > vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); > vcpu_load_state(vm, VCPU_ID, state); > run = vcpu_state(vm, VCPU_ID); > free(state); > -- > 2.20.1 >
On 26/06/19 15:09, Liran Alon wrote: > Currently KVM_STATE_NESTED_EVMCS is used to signal that eVMCS > capability is enabled on vCPU. > As indicated by vmx->nested.enlightened_vmcs_enabled. > > This is quite bizarre as userspace VMM should make sure to expose > same vCPU with same CPUID values in both source and destination. > In case vCPU is exposed with eVMCS support on CPUID, it is also > expected to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability. > Therefore, KVM_STATE_NESTED_EVMCS is redundant. > > KVM_STATE_NESTED_EVMCS is currently used on restore path > (vmx_set_nested_state()) only to enable eVMCS capability in KVM > and to signal need_vmcs12_sync such that on next VMEntry to guest > nested_sync_from_vmcs12() will be called to sync vmcs12 content > into eVMCS in guest memory. > However, because restore nested-state is rare enough, we could > have just modified vmx_set_nested_state() to always signal > need_vmcs12_sync. > > From all the above, it seems that we could have just removed > the usage of KVM_STATE_NESTED_EVMCS. However, in order to preserve > backwards migration compatibility, we cannot do that. > (vmx_get_nested_state() needs to signal flag when migrating from > new kernel to old kernel). > > Returning KVM_STATE_NESTED_EVMCS when just vCPU have eVMCS enabled > have a bad side-effect of userspace VMM having to send nested-state > from source to destination as part of migration stream. Even if > guest have never used eVMCS as it doesn't even run a nested > hypervisor workload. This requires destination userspace VMM and > KVM to support setting nested-state. Which make it more difficult > to migrate from new host to older host. > To avoid this, change KVM_STATE_NESTED_EVMCS to signal eVMCS is > not only enabled but also active. i.e. Guest have made some > eVMCS active via an enlightened VMEntry. i.e. vmcs12 is copied > from eVMCS and therefore should be restored into eVMCS resident > in memory (by copy_vmcs12_to_enlightened()). > > Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com> > Reviewed-by: Maran Wilson <maran.wilson@oracle.com> > Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> > Signed-off-by: Liran Alon <liran.alon@oracle.com> This patch has the same conflict so I will also queue it later. Paolo > --- > arch/x86/kvm/vmx/nested.c | 25 ++++++++++++------- > .../testing/selftests/kvm/x86_64/evmcs_test.c | 1 + > 2 files changed, 17 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index b66001fb0232..18efb338ed8a 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, > vmx = to_vmx(vcpu); > vmcs12 = get_vmcs12(vcpu); > > - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) > - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; > - > if (nested_vmx_allowed(vcpu) && > (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { > kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; > @@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, > if (vmx_has_valid_vmcs12(vcpu)) { > kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); > > + if (vmx->nested.hv_evmcs) > + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; > + > if (is_guest_mode(vcpu) && > nested_cpu_has_shadow_vmcs(vmcs12) && > vmcs12->vmcs_link_pointer != -1ull) > @@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) > return -EINVAL; > > + /* > + * KVM_STATE_NESTED_EVMCS used to signal that KVM should > + * enable eVMCS capability on vCPU. However, since then > + * code was changed such that flag signals vmcs12 should > + * be copied into eVMCS in guest memory. > + * > + * To preserve backwards compatability, allow user > + * to set this flag even when there is no VMXON region. > + */ > if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) > return -EINVAL; > } else { > @@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > > if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) > return -EINVAL; > - } > + } > > if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && > (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) > @@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, > !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) > return -EINVAL; > > - vmx_leave_nested(vcpu); > - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { > - if (!nested_vmx_allowed(vcpu)) > + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && > + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) > return -EINVAL; > > - nested_enable_evmcs(vcpu, NULL); > - } > + vmx_leave_nested(vcpu); > > if (kvm_state->hdr.vmx.vmxon_pa == -1ull) > return 0; > diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c > index b38260e29775..241919ef1eac 100644 > --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c > +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c > @@ -146,6 +146,7 @@ int main(int argc, char *argv[]) > kvm_vm_restart(vm, O_RDWR); > vm_vcpu_add(vm, VCPU_ID, 0, 0); > vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); > vcpu_load_state(vm, VCPU_ID, state); > run = vcpu_state(vm, VCPU_ID); > free(state); >
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b66001fb0232..18efb338ed8a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; - if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; @@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx_has_valid_vmcs12(vcpu)) { kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); + if (vmx->nested.hv_evmcs) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; + if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) @@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; + /* + * KVM_STATE_NESTED_EVMCS used to signal that KVM should + * enable eVMCS capability on vCPU. However, since then + * code was changed such that flag signals vmcs12 should + * be copied into eVMCS in guest memory. + * + * To preserve backwards compatability, allow user + * to set this flag even when there is no VMXON region. + */ if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { @@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) return -EINVAL; - } + } if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) @@ -5383,13 +5392,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; - vmx_leave_nested(vcpu); - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { - if (!nested_vmx_allowed(vcpu)) + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; - nested_enable_evmcs(vcpu, NULL); - } + vmx_leave_nested(vcpu); if (kvm_state->hdr.vmx.vmxon_pa == -1ull) return 0; diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index b38260e29775..241919ef1eac 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -146,6 +146,7 @@ int main(int argc, char *argv[]) kvm_vm_restart(vm, O_RDWR); vm_vcpu_add(vm, VCPU_ID, 0, 0); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); free(state);