Message ID | 20210210164033.607612-17-vkuznets@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | i386: KVM: expand Hyper-V features early and provide simple 'hv-default=on' option | expand |
On Wed, 10 Feb 2021 17:40:28 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Sometimes we'd like to know which features were explicitly enabled and which > were explicitly disabled on the command line. E.g. it seems logical to handle > 'hv_passthrough,hv_feature=off' as "enable everything supported by the host > except for hv_feature" but this doesn't seem to be possible with the current > 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' > add-ons and track explicit enablement/disablement there. > > Note, it doesn't seem to be possible to fill 'hyperv_features' array during > CPU creation time when 'hv-passthrough' is specified and we're running on > an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list > of the supported Hyper-V features we need to actually create KVM VCPU and > this happens much later. seems to me that we are returning back to +-feat parsing, this time only for hyperv. I'm not sure I like it back, especially considering we are going to drop "-feat" priority for x86. now about impossible, see arm/kvm/virt, they create a 'sample' VCPU at KVM init time to probe for some CPU features in advance. You can use similar approach to prepare value for hyperv_features. > > No functional change intended. > > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> > --- > target/i386/cpu.c | 237 ++++++++++++++++++++++++++++++++++++++++------ > target/i386/cpu.h | 2 + > 2 files changed, 209 insertions(+), 30 deletions(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index c4e8863c7ca0..e8a004c39d04 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -4553,6 +4553,178 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, const char *name, > cpu->env.tsc_khz = cpu->env.user_tsc_khz = value / 1000; > } > > +static bool x86_hv_feature_get(Object *obj, int feature) > +{ > + X86CPU *cpu = X86_CPU(obj); > + > + return cpu->hyperv_features & BIT(feature); > +} > + > +static void x86_hv_feature_set(Object *obj, bool value, int feature) > +{ > + X86CPU *cpu = X86_CPU(obj); > + > + if (value) { > + cpu->hyperv_features |= BIT(feature); > + cpu->hyperv_features_on |= BIT(feature); > + cpu->hyperv_features_off &= ~BIT(feature); > + } else { > + cpu->hyperv_features &= ~BIT(feature); > + cpu->hyperv_features_on &= ~BIT(feature); > + cpu->hyperv_features_off |= BIT(feature); > + } > +} > + > +static bool x86_hv_relaxed_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_RELAXED); > +} > + > +static void x86_hv_relaxed_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_RELAXED); > +} > + > +static bool x86_hv_vapic_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_VAPIC); > +} > + > +static void x86_hv_vapic_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_VAPIC); > +} > + > +static bool x86_hv_time_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_TIME); > +} > + > +static void x86_hv_time_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_TIME); > +} > + > +static bool x86_hv_crash_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_CRASH); > +} > + > +static void x86_hv_crash_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_CRASH); > +} > + > +static bool x86_hv_reset_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_RESET); > +} > + > +static void x86_hv_reset_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_RESET); > +} > + > +static bool x86_hv_vpindex_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_VPINDEX); > +} > + > +static void x86_hv_vpindex_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_VPINDEX); > +} > + > +static bool x86_hv_runtime_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_RUNTIME); > +} > + > +static void x86_hv_runtime_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_RUNTIME); > +} > + > +static bool x86_hv_synic_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_SYNIC); > +} > + > +static void x86_hv_synic_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_SYNIC); > +} > + > +static bool x86_hv_stimer_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_STIMER); > +} > + > +static void x86_hv_stimer_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_STIMER); > +} > + > +static bool x86_hv_frequencies_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_FREQUENCIES); > +} > + > +static void x86_hv_frequencies_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_FREQUENCIES); > +} > + > +static bool x86_hv_reenlightenment_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_REENLIGHTENMENT); > +} > + > +static void x86_hv_reenlightenment_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_REENLIGHTENMENT); > +} > + > +static bool x86_hv_tlbflush_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_TLBFLUSH); > +} > + > +static void x86_hv_tlbflush_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_TLBFLUSH); > +} > + > +static bool x86_hv_evmcs_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_EVMCS); > +} > + > +static void x86_hv_evmcs_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_EVMCS); > +} > + > +static bool x86_hv_ipi_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_IPI); > +} > + > +static void x86_hv_ipi_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_IPI); > +} > + > +static bool x86_hv_stimer_direct_get(Object *obj, Error **errp) > +{ > + return x86_hv_feature_get(obj, HYPERV_FEAT_STIMER_DIRECT); > +} > + > +static void x86_hv_stimer_direct_set(Object *obj, bool value, Error **errp) > +{ > + x86_hv_feature_set(obj, value, HYPERV_FEAT_STIMER_DIRECT); > +} > + > /* Generic getter for "feature-words" and "filtered-features" properties */ > static void x86_cpu_get_feature_words(Object *obj, Visitor *v, > const char *name, void *opaque, > @@ -7107,36 +7279,6 @@ static Property x86_cpu_properties[] = { > > DEFINE_PROP_UINT32("hv-spinlocks", X86CPU, hyperv_spinlock_attempts, > HYPERV_SPINLOCK_NEVER_NOTIFY), > - DEFINE_PROP_BIT64("hv-relaxed", X86CPU, hyperv_features, > - HYPERV_FEAT_RELAXED, 0), > - DEFINE_PROP_BIT64("hv-vapic", X86CPU, hyperv_features, > - HYPERV_FEAT_VAPIC, 0), > - DEFINE_PROP_BIT64("hv-time", X86CPU, hyperv_features, > - HYPERV_FEAT_TIME, 0), > - DEFINE_PROP_BIT64("hv-crash", X86CPU, hyperv_features, > - HYPERV_FEAT_CRASH, 0), > - DEFINE_PROP_BIT64("hv-reset", X86CPU, hyperv_features, > - HYPERV_FEAT_RESET, 0), > - DEFINE_PROP_BIT64("hv-vpindex", X86CPU, hyperv_features, > - HYPERV_FEAT_VPINDEX, 0), > - DEFINE_PROP_BIT64("hv-runtime", X86CPU, hyperv_features, > - HYPERV_FEAT_RUNTIME, 0), > - DEFINE_PROP_BIT64("hv-synic", X86CPU, hyperv_features, > - HYPERV_FEAT_SYNIC, 0), > - DEFINE_PROP_BIT64("hv-stimer", X86CPU, hyperv_features, > - HYPERV_FEAT_STIMER, 0), > - DEFINE_PROP_BIT64("hv-frequencies", X86CPU, hyperv_features, > - HYPERV_FEAT_FREQUENCIES, 0), > - DEFINE_PROP_BIT64("hv-reenlightenment", X86CPU, hyperv_features, > - HYPERV_FEAT_REENLIGHTENMENT, 0), > - DEFINE_PROP_BIT64("hv-tlbflush", X86CPU, hyperv_features, > - HYPERV_FEAT_TLBFLUSH, 0), > - DEFINE_PROP_BIT64("hv-evmcs", X86CPU, hyperv_features, > - HYPERV_FEAT_EVMCS, 0), > - DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, > - HYPERV_FEAT_IPI, 0), > - DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features, > - HYPERV_FEAT_STIMER_DIRECT, 0), > DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, > hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), > DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), > @@ -7283,6 +7425,41 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) > x86_cpu_get_crash_info_qom, NULL, NULL, NULL); > #endif > > + object_class_property_add_bool(oc, "hv-relaxed", > + x86_hv_relaxed_get, x86_hv_relaxed_set); > + object_class_property_add_bool(oc, "hv-vapic", > + x86_hv_vapic_get, x86_hv_vapic_set); > + object_class_property_add_bool(oc, "hv-time", > + x86_hv_time_get, x86_hv_time_set); > + object_class_property_add_bool(oc, "hv-crash", > + x86_hv_crash_get, x86_hv_crash_set); > + object_class_property_add_bool(oc, "hv-reset", > + x86_hv_reset_get, x86_hv_reset_set); > + object_class_property_add_bool(oc, "hv-vpindex", > + x86_hv_vpindex_get, x86_hv_vpindex_set); > + object_class_property_add_bool(oc, "hv-runtime", > + x86_hv_runtime_get, x86_hv_runtime_set); > + object_class_property_add_bool(oc, "hv-synic", > + x86_hv_synic_get, x86_hv_synic_set); > + object_class_property_add_bool(oc, "hv-stimer", > + x86_hv_stimer_get, x86_hv_stimer_set); > + object_class_property_add_bool(oc, "hv-frequencies", > + x86_hv_frequencies_get, > + x86_hv_frequencies_set); > + object_class_property_add_bool(oc, "hv-reenlightenment", > + x86_hv_reenlightenment_get, > + x86_hv_reenlightenment_set); > + object_class_property_add_bool(oc, "hv-tlbflush", > + x86_hv_tlbflush_get, x86_hv_tlbflush_set); > + object_class_property_add_bool(oc, "hv-evmcs", > + x86_hv_evmcs_get, > + x86_hv_evmcs_set); > + object_class_property_add_bool(oc, "hv-ipi", > + x86_hv_ipi_get, x86_hv_ipi_set); > + object_class_property_add_bool(oc, "hv-stimer-direct", > + x86_hv_stimer_direct_get, > + x86_hv_stimer_direct_set); > + > for (w = 0; w < FEATURE_WORDS; w++) { > int bitnr; > for (bitnr = 0; bitnr < 64; bitnr++) { > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 7ea14822aab5..b4fbd46f0fc9 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1667,6 +1667,8 @@ struct X86CPU { > char *hyperv_vendor; > bool hyperv_synic_kvm_only; > uint64_t hyperv_features; > + uint64_t hyperv_features_on; > + uint64_t hyperv_features_off; > bool hyperv_passthrough; > OnOffAuto hyperv_no_nonarch_cs; > uint32_t hyperv_vendor_id[3];
Igor Mammedov <imammedo@redhat.com> writes: > On Wed, 10 Feb 2021 17:40:28 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Sometimes we'd like to know which features were explicitly enabled and which >> were explicitly disabled on the command line. E.g. it seems logical to handle >> 'hv_passthrough,hv_feature=off' as "enable everything supported by the host >> except for hv_feature" but this doesn't seem to be possible with the current >> 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' >> add-ons and track explicit enablement/disablement there. >> >> Note, it doesn't seem to be possible to fill 'hyperv_features' array during >> CPU creation time when 'hv-passthrough' is specified and we're running on >> an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list >> of the supported Hyper-V features we need to actually create KVM VCPU and >> this happens much later. > > seems to me that we are returning back to +-feat parsing, this time only for > hyperv. > I'm not sure I like it back, especially considering we are going to > drop "-feat" priority for x86. > > now about impossible, see arm/kvm/virt, they create a 'sample' VCPU at KVM > init time to probe for some CPU features in advance. You can use similar > approach to prepare value for hyperv_features. > KVM_CAP_SYS_HYPERV_CPUID is supported since 5.11 and eventually it'll make it to all kernels we care about so I'd really like to avoid any 'sample' CPUs for the time being. On/off parsing looks like a much lesser evil.
On Fri, 12 Feb 2021 09:45:52 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Wed, 10 Feb 2021 17:40:28 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Sometimes we'd like to know which features were explicitly enabled and which > >> were explicitly disabled on the command line. E.g. it seems logical to handle > >> 'hv_passthrough,hv_feature=off' as "enable everything supported by the host > >> except for hv_feature" but this doesn't seem to be possible with the current > >> 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' > >> add-ons and track explicit enablement/disablement there. > >> > >> Note, it doesn't seem to be possible to fill 'hyperv_features' array during > >> CPU creation time when 'hv-passthrough' is specified and we're running on > >> an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list > >> of the supported Hyper-V features we need to actually create KVM VCPU and > >> this happens much later. > > > > seems to me that we are returning back to +-feat parsing, this time only for > > hyperv. > > I'm not sure I like it back, especially considering we are going to > > drop "-feat" priority for x86. > > > > now about impossible, see arm/kvm/virt, they create a 'sample' VCPU at KVM > > init time to probe for some CPU features in advance. You can use similar > > approach to prepare value for hyperv_features. > > > > KVM_CAP_SYS_HYPERV_CPUID is supported since 5.11 and eventually it'll > make it to all kernels we care about so I'd really like to avoid any > 'sample' CPUs for the time being. On/off parsing looks like a much > lesser evil. When minimum supported by QEMU kernel version gets there, you can remove scratch CPU in QEMU (if hyperv will remain its sole user). writing your own property parser like in this series, is possible too but it adds extra fields to track state and hard to follow logic. On top it adds a lot of churn by switching hv_ features to dynamic properties, which is not necessary if scratch CPU approach is used. Please try reusing scratch CPU approach, see kvm_arm_get_host_cpu_features() for an example. You will very likely end up with simpler series, compared to reinventing wheel. in proto would look like: * kvm_init: hv_passthrough_cached = scratch_cpu->hyperv_features * property parsing time: x86_hv_passthrough_set() cpu->hyperv_features = hv_passthrough_cached all other features handled by generic property parsing, you don't have to do any special handling for them. * cpu_relalize() hv_expand() to check for dependencies, conflicts availability of features.
Igor Mammedov <imammedo@redhat.com> writes: > On Fri, 12 Feb 2021 09:45:52 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Igor Mammedov <imammedo@redhat.com> writes: >> >> > On Wed, 10 Feb 2021 17:40:28 +0100 >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> > >> >> Sometimes we'd like to know which features were explicitly enabled and which >> >> were explicitly disabled on the command line. E.g. it seems logical to handle >> >> 'hv_passthrough,hv_feature=off' as "enable everything supported by the host >> >> except for hv_feature" but this doesn't seem to be possible with the current >> >> 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' >> >> add-ons and track explicit enablement/disablement there. >> >> >> >> Note, it doesn't seem to be possible to fill 'hyperv_features' array during >> >> CPU creation time when 'hv-passthrough' is specified and we're running on >> >> an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list >> >> of the supported Hyper-V features we need to actually create KVM VCPU and >> >> this happens much later. >> > >> > seems to me that we are returning back to +-feat parsing, this time only for >> > hyperv. >> > I'm not sure I like it back, especially considering we are going to >> > drop "-feat" priority for x86. >> > >> > now about impossible, see arm/kvm/virt, they create a 'sample' VCPU at KVM >> > init time to probe for some CPU features in advance. You can use similar >> > approach to prepare value for hyperv_features. >> > >> >> KVM_CAP_SYS_HYPERV_CPUID is supported since 5.11 and eventually it'll >> make it to all kernels we care about so I'd really like to avoid any >> 'sample' CPUs for the time being. On/off parsing looks like a much >> lesser evil. > When minimum supported by QEMU kernel version gets there, you can remove > scratch CPU in QEMU (if hyperv will remain its sole user). > > writing your own property parser like in this series, is possible too > but it adds extra fields to track state and hard to follow logic. > On top it adds a lot of churn by switching hv_ features to dynamic > properties, which is not necessary if scratch CPU approach is used. > > Please try reusing scratch CPU approach, see > kvm_arm_get_host_cpu_features() > for an example. You will very likely end up with simpler series, > compared to reinventing wheel. Even if I do that (and I serioulsy doubt it's going to be easier than just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 lines long) this is not going to give us what we need to distinguish between 'hv-passthrough,hv-evmcs' and 'hv-passthrough' when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we don't want to enable it unless it was requested explicitly (former but not the later). Moreover, instead of just adding two 'u64's we're now doing an ioctl which can fail, be subject to limits,... Creating and destroying a CPU is also slow. Sorry, I hardly see how this is better, maybe just from 'code purity' point of view.
Vitaly Kuznetsov <vkuznets@redhat.com> writes: > Igor Mammedov <imammedo@redhat.com> writes: > >> >> Please try reusing scratch CPU approach, see >> kvm_arm_get_host_cpu_features() >> for an example. You will very likely end up with simpler series, >> compared to reinventing wheel. > > Even if I do that (and I serioulsy doubt it's going to be easier than > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 > lines long) this is not going to give us what we need to distinguish > between > > 'hv-passthrough,hv-evmcs' > > and > > 'hv-passthrough' > > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we > don't want to enable it unless it was requested explicitly (former but > not the later). ... and if for whatever reason we decide that this is also bad/not needed, I can just drop patches 16-18 from the series (leaving 'hv-passthrough,hv-feature=off' problem to better times).
On Fri, 12 Feb 2021 16:19:24 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Fri, 12 Feb 2021 09:45:52 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Igor Mammedov <imammedo@redhat.com> writes: > >> > >> > On Wed, 10 Feb 2021 17:40:28 +0100 > >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> > > >> >> Sometimes we'd like to know which features were explicitly enabled and which > >> >> were explicitly disabled on the command line. E.g. it seems logical to handle > >> >> 'hv_passthrough,hv_feature=off' as "enable everything supported by the host > >> >> except for hv_feature" but this doesn't seem to be possible with the current > >> >> 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' > >> >> add-ons and track explicit enablement/disablement there. > >> >> > >> >> Note, it doesn't seem to be possible to fill 'hyperv_features' array during > >> >> CPU creation time when 'hv-passthrough' is specified and we're running on > >> >> an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list > >> >> of the supported Hyper-V features we need to actually create KVM VCPU and > >> >> this happens much later. > >> > > >> > seems to me that we are returning back to +-feat parsing, this time only for > >> > hyperv. > >> > I'm not sure I like it back, especially considering we are going to > >> > drop "-feat" priority for x86. > >> > > >> > now about impossible, see arm/kvm/virt, they create a 'sample' VCPU at KVM > >> > init time to probe for some CPU features in advance. You can use similar > >> > approach to prepare value for hyperv_features. > >> > > >> > >> KVM_CAP_SYS_HYPERV_CPUID is supported since 5.11 and eventually it'll > >> make it to all kernels we care about so I'd really like to avoid any > >> 'sample' CPUs for the time being. On/off parsing looks like a much > >> lesser evil. > > When minimum supported by QEMU kernel version gets there, you can remove > > scratch CPU in QEMU (if hyperv will remain its sole user). > > > > writing your own property parser like in this series, is possible too > > but it adds extra fields to track state and hard to follow logic. > > On top it adds a lot of churn by switching hv_ features to dynamic > > properties, which is not necessary if scratch CPU approach is used. > > > > Please try reusing scratch CPU approach, see > > kvm_arm_get_host_cpu_features() > > for an example. You will very likely end up with simpler series, > > compared to reinventing wheel. > > Even if I do that (and I serioulsy doubt it's going to be easier than > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 it does a lot more then what you need, kvm_arm_create_scratch_host_vcpu() which it uses will do the job and even that could be made smaller for hv usecase. > lines long) this is not going to give us what we need to distinguish > between > > 'hv-passthrough,hv-evmcs' > > and > > 'hv-passthrough' > > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we > don't want to enable it unless it was requested explicitly (former but > not the later). could you elaborate more on it, i.e. why do we need to distinguish and why do we need evmcs without VMX if user asked for it (will it be usable) > Moreover, instead of just adding two 'u64's we're now doing an ioctl > which can fail, be subject to limits,... Creating and destroying a CPU > is also slow. Sorry, I hardly see how this is better, maybe just from > 'code purity' point of view. readable and easy to maintain code is not a thing to neglect.
On Fri, 12 Feb 2021 16:26:03 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Vitaly Kuznetsov <vkuznets@redhat.com> writes: > > > Igor Mammedov <imammedo@redhat.com> writes: > > > >> > >> Please try reusing scratch CPU approach, see > >> kvm_arm_get_host_cpu_features() > >> for an example. You will very likely end up with simpler series, > >> compared to reinventing wheel. > > > > Even if I do that (and I serioulsy doubt it's going to be easier than > > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 > > lines long) this is not going to give us what we need to distinguish > > between > > > > 'hv-passthrough,hv-evmcs' > > > > and > > > > 'hv-passthrough' > > > > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we > > don't want to enable it unless it was requested explicitly (former but > > not the later). > > ... and if for whatever reason we decide that this is also bad/not > needed, I can just drop patches 16-18 from the series (leaving > 'hv-passthrough,hv-feature=off' problem to better times). that's also an option, we would need to make sure that hv-passthrough is mutually exclusive with ''all'' other hv- properties to avoid above combination being ever (mis)used.
Igor Mammedov <imammedo@redhat.com> writes: >> > >> > Please try reusing scratch CPU approach, see >> > kvm_arm_get_host_cpu_features() >> > for an example. You will very likely end up with simpler series, >> > compared to reinventing wheel. >> >> Even if I do that (and I serioulsy doubt it's going to be easier than >> just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 > it does a lot more then what you need, kvm_arm_create_scratch_host_vcpu() > which it uses will do the job and even that could be made smaller > for hv usecase. > >> lines long) this is not going to give us what we need to distinguish >> between >> >> 'hv-passthrough,hv-evmcs' >> >> and >> >> 'hv-passthrough' >> >> when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we >> don't want to enable it unless it was requested explicitly (former but >> not the later). > could you elaborate more on it, i.e. why do we need to distinguish and why > do we need evmcs without VMX if user asked for it (will it be usable) > We need to distinguish because that would be sane. Enlightened VMCS is an extension to VMX, it can't be used without it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, it comes with nesting (-ExposeVirtualizationExtensions $true). When we create a default set of Hyper-V enlightenments (either 'hv-default' or 'hv-passthrough') we should be as close as possible to genuine Hyper-V to not create unsupported Frankenstiens which can break with any Windows update (because nobody tested these configurations). That bein said, if guest CPU lacks VMX it is counter-productive to expose EVMCS. However, there is a problem with explicit enablement: what should 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't sound sane to me. >> Moreover, instead of just adding two 'u64's we're now doing an ioctl >> which can fail, be subject to limits,... Creating and destroying a CPU >> is also slow. Sorry, I hardly see how this is better, maybe just from >> 'code purity' point of view. > readable and easy to maintain code is not a thing to neglect. Of couse, but 'scratch CPU' idea is not a good design decision, it is an ugly hack we should get rid of in ARM land, not try bringing it to other architectures. Generally, KVM should allow to query all its capabilities without the need to create a vCPU or, if not possible, we should create 'real' QEMU VCPUs and use one/all of the to query capabilities, avoiding 'scratch' because: - Creating and destroying a vCPU makes VM startup slower, much slower. E.g. for a single-CPU VM you're doubling the time required to create vCPUs! - vCPUs in KVM are quite memory consuming. Just 'struct kvm_vcpu_arch' was something like 12kb last time I looked at it. I have no clue why scratch vCPUs were implemented on ARM, however, I'd very much want us to avoid doing the same on x86. We do have use-cases where startup time and consumed memory is important. There is a point in limiting ioctls for security reasons (e.g. if I'm creating a single vCPU VM I may want to limit userspace process to one and only one KVM_CREATE_VCPU call). Now to the code you complain about. The 'hard to read and maintain' code is literaly this: +static void x86_hv_feature_set(Object *obj, bool value, int feature) +{ + X86CPU *cpu = X86_CPU(obj); + + if (value) { + cpu->hyperv_features |= BIT(feature); + cpu->hyperv_features_on |= BIT(feature); + cpu->hyperv_features_off &= ~BIT(feature); + } else { + cpu->hyperv_features &= ~BIT(feature); + cpu->hyperv_features_on &= ~BIT(feature); + cpu->hyperv_features_off |= BIT(feature); + } +} I can add as many comments here as needed, however, I don't see what requires additional explanaition. We just want to know two things: - What's the 'effective' setting of the control - Was it explicitly enabled or disabled on the command line. Custom parsers are not new in QEMU and they're not going anywhere I believe. There are options with simple enablent and there are some with additional considerations. Trying to make CPU objects somewhat 'special' by forcing all options to be of type-1 (and thus crippling user experience) is not the way to go IMHO. I'd very much like us to go in another direction, make our option parser better so my very simple use-case is covered 'out-of-the-box'.
Igor Mammedov <imammedo@redhat.com> writes: > On Fri, 12 Feb 2021 16:26:03 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> > >> >> >> >> Please try reusing scratch CPU approach, see >> >> kvm_arm_get_host_cpu_features() >> >> for an example. You will very likely end up with simpler series, >> >> compared to reinventing wheel. >> > >> > Even if I do that (and I serioulsy doubt it's going to be easier than >> > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 >> > lines long) this is not going to give us what we need to distinguish >> > between >> > >> > 'hv-passthrough,hv-evmcs' >> > >> > and >> > >> > 'hv-passthrough' >> > >> > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we >> > don't want to enable it unless it was requested explicitly (former but >> > not the later). >> >> ... and if for whatever reason we decide that this is also bad/not >> needed, I can just drop patches 16-18 from the series (leaving >> 'hv-passthrough,hv-feature=off' problem to better times). > that's also an option, > we would need to make sure that hv-passthrough is mutually exclusive > with ''all'' other hv- properties to avoid above combination being > ever (mis)used. That's an option to finally get these patches merged, not a good option for end users. 'hv-passthrough,hv-feature' works today and it's useful. Should we drop it? 'hv-passthrough/hv-default' and 'hv-passthrough/hv-default,hv-evmcs' should give us sane results. 'hv-passthrough,hv-feature=off' is convenient. Why droppping this all? To save 9 (nine) lines of code in the parser?
On Mon, Feb 15, 2021 at 09:53:50AM +0100, Vitaly Kuznetsov wrote:
> I have no clue why scratch vCPUs were implemented on ARM, however, I'd
We don't have an ioctl like KVM_GET_SUPPORTED_CPUID, which operates on
the KVM fd. Perhaps we should.
Thanks,
drew
On Mon, 15 Feb 2021 09:56:19 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Fri, 12 Feb 2021 16:26:03 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: > >> > >> > Igor Mammedov <imammedo@redhat.com> writes: > >> > > >> >> > >> >> Please try reusing scratch CPU approach, see > >> >> kvm_arm_get_host_cpu_features() > >> >> for an example. You will very likely end up with simpler series, > >> >> compared to reinventing wheel. > >> > > >> > Even if I do that (and I serioulsy doubt it's going to be easier than > >> > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 > >> > lines long) this is not going to give us what we need to distinguish > >> > between > >> > > >> > 'hv-passthrough,hv-evmcs' > >> > > >> > and > >> > > >> > 'hv-passthrough' > >> > > >> > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we > >> > don't want to enable it unless it was requested explicitly (former but > >> > not the later). > >> > >> ... and if for whatever reason we decide that this is also bad/not > >> needed, I can just drop patches 16-18 from the series (leaving > >> 'hv-passthrough,hv-feature=off' problem to better times). > > that's also an option, > > we would need to make sure that hv-passthrough is mutually exclusive > > with ''all'' other hv- properties to avoid above combination being > > ever (mis)used. > > That's an option to finally get these patches merged, not a good option > for end users. > > 'hv-passthrough,hv-feature' works today and it's useful. Should we drop > it? well, try suggested idea about using scratch CPU and it might get merged sooner. (it's not like I'm suggesting you to rewrite half of QEMU, just some of patches, which most likely would simplify series from my point of view and would be easier to maintain) > > 'hv-passthrough/hv-default' and 'hv-passthrough/hv-default,hv-evmcs' > should give us sane results. > > 'hv-passthrough,hv-feature=off' is convenient. > > Why droppping this all? To save 9 (nine) lines of code in the parser? it's doing what generic property parsing is capable off, provided you fish out hv-passthrough value in advance like arm/virt does (I think ppc does similar hing also), so I consider it as unnecessary code duplication/ complication and maintenance burden. If it were a hotfix during hard-freeze may be I'd agree (with promise to rework it later to something more palatable), but it's not, for patches in state they are now I'm not confident enough to ACK them.
On Mon, 15 Feb 2021 09:53:50 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > >> > > >> > Please try reusing scratch CPU approach, see > >> > kvm_arm_get_host_cpu_features() > >> > for an example. You will very likely end up with simpler series, > >> > compared to reinventing wheel. > >> > >> Even if I do that (and I serioulsy doubt it's going to be easier than > >> just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 > > it does a lot more then what you need, kvm_arm_create_scratch_host_vcpu() > > which it uses will do the job and even that could be made smaller > > for hv usecase. > > > >> lines long) this is not going to give us what we need to distinguish > >> between > >> > >> 'hv-passthrough,hv-evmcs' > >> > >> and > >> > >> 'hv-passthrough' > >> > >> when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we > >> don't want to enable it unless it was requested explicitly (former but > >> not the later). > > could you elaborate more on it, i.e. why do we need to distinguish and why > > do we need evmcs without VMX if user asked for it (will it be usable) > > > > We need to distinguish because that would be sane. > > Enlightened VMCS is an extension to VMX, it can't be used without > it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, ... > That bein said, if > guest CPU lacks VMX it is counter-productive to expose EVMCS. However, > there is a problem with explicit enablement: what should > > 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't > sound sane to me. based on above I'd error out is user asks for unsupported option i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out if later on we find usecase for VMX=off + hv-evmcs=on, we will be able to drop error without affecting existing users, but not other way around. > >> Moreover, instead of just adding two 'u64's we're now doing an ioctl > >> which can fail, be subject to limits,... Creating and destroying a CPU > >> is also slow. Sorry, I hardly see how this is better, maybe just from > >> 'code purity' point of view. > > readable and easy to maintain code is not a thing to neglect. > > Of couse, but 'scratch CPU' idea is not a good design decision, it is an > ugly hack we should get rid of in ARM land, not try bringing it to other > architectures. Generally, KVM should allow to query all its capabilities > without the need to create a vCPU or, if not possible, we should create > 'real' QEMU VCPUs and use one/all of the to query capabilities, avoiding > 'scratch' because: > - Creating and destroying a vCPU makes VM startup slower, much > slower. E.g. for a single-CPU VM you're doubling the time required to > create vCPUs! > - vCPUs in KVM are quite memory consuming. Just 'struct kvm_vcpu_arch' > was something like 12kb last time I looked at it. > > I have no clue why scratch vCPUs were implemented on ARM, however, I'd > very much want us to avoid doing the same on x86. We do have use-cases > where startup time and consumed memory is important. There is a point in > limiting ioctls for security reasons (e.g. if I'm creating a single vCPU > VM I may want to limit userspace process to one and only one > KVM_CREATE_VCPU call). it should be possible to reuse scratch VCPU (kvm file descriptor) as the first CPU of VM, if there is a will/need, without creating unnecessary overhead. I don't like scratch CPU either but from my pov it's a lesser evil to spawning custom parser every time someone fills like it. > Now to the code you complain about. The 'hard to read and maintain' code > is literaly this: > > +static void x86_hv_feature_set(Object *obj, bool value, int feature) > +{ > + X86CPU *cpu = X86_CPU(obj); > + > + if (value) { > + cpu->hyperv_features |= BIT(feature); > + cpu->hyperv_features_on |= BIT(feature); > + cpu->hyperv_features_off &= ~BIT(feature); > + } else { > + cpu->hyperv_features &= ~BIT(feature); > + cpu->hyperv_features_on &= ~BIT(feature); > + cpu->hyperv_features_off |= BIT(feature); > + } > +} It's not just that code but the rest that uses above variables to get final hyperv_features feature set. There is a lot of invariants that are hidden in hv specific code that you put in hyperv kvm specific part. btw why can't we get supported hyperv_features in passthrough mode during time we initialize KVM (without a vCPU)? > I can add as many comments here as needed, however, I don't see what > requires additional explanaition. We just want to know two things: > - What's the 'effective' setting of the control > - Was it explicitly enabled or disabled on the command line. > > Custom parsers are not new in QEMU and they're not going anywhere I > believe. There are options with simple enablent and there are some with > additional considerations. Trying to make CPU objects somewhat 'special' > by forcing all options to be of type-1 (and thus crippling user > experience) is not the way to go IMHO. I'd very much like us to go in > another direction, make our option parser better so my very simple > use-case is covered 'out-of-the-box'. there is a lot of effort spent on getting rid of custom parsers that QEMU accumulated over years. Probably there were good reasons to add them back then, and now someone else has to spend time to clean them up. hyperv case is not any special in that regard (at least I'm not convinced at this point). Try alternative(s) first, if that doesn't work out, then custom parser might be necessary.
On Mon, 15 Feb 2021 16:55:02 +0100 Igor Mammedov <imammedo@redhat.com> wrote: > On Mon, 15 Feb 2021 09:56:19 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > Igor Mammedov <imammedo@redhat.com> writes: > > > > > On Fri, 12 Feb 2021 16:26:03 +0100 > > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > > > >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: > > >> > > >> > Igor Mammedov <imammedo@redhat.com> writes: > > >> > [...] > >(I think ppc does similar hing also) well scratch that off, I can't find PPC part anymore. Maybe I've confused that with something else. [...]
Igor Mammedov <imammedo@redhat.com> writes: >> >> We need to distinguish because that would be sane. >> >> Enlightened VMCS is an extension to VMX, it can't be used without >> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, > ... >> That bein said, if >> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >> there is a problem with explicit enablement: what should >> >> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >> sound sane to me. > based on above I'd error out is user asks for unsupported option > i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out That's what I keep telling you but you don't seem to listen. 'Scratch CPU' can't possibly help with this use-case because when you parse 'hv-passthrough,hv-evmcs,vmx=off' you 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the host. 2) 'hv-evmcs' -> keep EVMCS bit '1' 3) 'vmx=off' -> you have no idea where EVMCS bit came from. We have to remember which options were aquired from the host and which were set explicitly by the user. Ok, you can replace 'hyperv_features_on' with 'evmcs_was_explicitly_requested' but how is it better? > > if later on we find usecase for VMX=off + hv-evmcs=on, > we will be able to drop error without affecting existing users, > but not other way around. > >> >> Moreover, instead of just adding two 'u64's we're now doing an ioctl >> >> which can fail, be subject to limits,... Creating and destroying a CPU >> >> is also slow. Sorry, I hardly see how this is better, maybe just from >> >> 'code purity' point of view. >> > readable and easy to maintain code is not a thing to neglect. >> >> Of couse, but 'scratch CPU' idea is not a good design decision, it is an >> ugly hack we should get rid of in ARM land, not try bringing it to other >> architectures. Generally, KVM should allow to query all its capabilities >> without the need to create a vCPU or, if not possible, we should create >> 'real' QEMU VCPUs and use one/all of the to query capabilities, avoiding >> 'scratch' because: >> - Creating and destroying a vCPU makes VM startup slower, much >> slower. E.g. for a single-CPU VM you're doubling the time required to >> create vCPUs! >> - vCPUs in KVM are quite memory consuming. Just 'struct kvm_vcpu_arch' >> was something like 12kb last time I looked at it. >> >> I have no clue why scratch vCPUs were implemented on ARM, however, I'd >> very much want us to avoid doing the same on x86. We do have use-cases >> where startup time and consumed memory is important. There is a point in >> limiting ioctls for security reasons (e.g. if I'm creating a single vCPU >> VM I may want to limit userspace process to one and only one >> KVM_CREATE_VCPU call). > it should be possible to reuse scratch VCPU (kvm file descriptor) as > the first CPU of VM, if there is a will/need, without creating unnecessary overhead. > I don't like scratch CPU either but from my pov it's a lesser evil to > spawning custom parser every time someone fills like it. I respectfully disagree. > > >> Now to the code you complain about. The 'hard to read and maintain' code >> is literaly this: >> >> +static void x86_hv_feature_set(Object *obj, bool value, int feature) >> +{ >> + X86CPU *cpu = X86_CPU(obj); >> + >> + if (value) { >> + cpu->hyperv_features |= BIT(feature); >> + cpu->hyperv_features_on |= BIT(feature); >> + cpu->hyperv_features_off &= ~BIT(feature); >> + } else { >> + cpu->hyperv_features &= ~BIT(feature); >> + cpu->hyperv_features_on &= ~BIT(feature); >> + cpu->hyperv_features_off |= BIT(feature); >> + } >> +} > It's not just that code but the rest that uses above variables to > get final hyperv_features feature set. There is a lot of invariants > that are hidden in hv specific code that you put in hyperv kvm > specific part. Could you give an example please? > > btw why can't we get supported hyperv_features in passthrough mode > during time we initialize KVM (without a vCPU)? I think I already explained that: KVM_GET_SUPPORTED_HV_CPUID works on KVM fd from 5.11, it requires a vCPU prior to that. > >> I can add as many comments here as needed, however, I don't see what >> requires additional explanaition. We just want to know two things: >> - What's the 'effective' setting of the control >> - Was it explicitly enabled or disabled on the command line. >> >> Custom parsers are not new in QEMU and they're not going anywhere I >> believe. There are options with simple enablent and there are some with >> additional considerations. Trying to make CPU objects somewhat 'special' >> by forcing all options to be of type-1 (and thus crippling user >> experience) is not the way to go IMHO. I'd very much like us to go in >> another direction, make our option parser better so my very simple >> use-case is covered 'out-of-the-box'. > there is a lot of effort spent on getting rid of custom parsers that > QEMU accumulated over years. Probably there were good reasons to add > them back then, and now someone else has to spend time to clean them up. > > hyperv case is not any special in that regard (at least I'm not convinced > at this point). Try alternative(s) first, if that doesn't work out, then > custom parser might be necessary. Please explain to me how 'hv-passthrough,hv-evmcs,-vmx' is going to throw an error and 'hv-passthrough,-vmx' is not.
Igor Mammedov <imammedo@redhat.com> writes: > On Mon, 15 Feb 2021 09:56:19 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Igor Mammedov <imammedo@redhat.com> writes: >> >> > On Fri, 12 Feb 2021 16:26:03 +0100 >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> > >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> >> > >> >> >> >> >> >> Please try reusing scratch CPU approach, see >> >> >> kvm_arm_get_host_cpu_features() >> >> >> for an example. You will very likely end up with simpler series, >> >> >> compared to reinventing wheel. >> >> > >> >> > Even if I do that (and I serioulsy doubt it's going to be easier than >> >> > just adding two 'u64's, kvm_arm_get_host_cpu_features() alone is 200 >> >> > lines long) this is not going to give us what we need to distinguish >> >> > between >> >> > >> >> > 'hv-passthrough,hv-evmcs' >> >> > >> >> > and >> >> > >> >> > 'hv-passthrough' >> >> > >> >> > when 'hv-evmcs' *is* supported by the host. When guest CPU lacks VMX we >> >> > don't want to enable it unless it was requested explicitly (former but >> >> > not the later). >> >> >> >> ... and if for whatever reason we decide that this is also bad/not >> >> needed, I can just drop patches 16-18 from the series (leaving >> >> 'hv-passthrough,hv-feature=off' problem to better times). >> > that's also an option, >> > we would need to make sure that hv-passthrough is mutually exclusive >> > with ''all'' other hv- properties to avoid above combination being >> > ever (mis)used. >> >> That's an option to finally get these patches merged, not a good option >> for end users. >> >> 'hv-passthrough,hv-feature' works today and it's useful. Should we drop >> it? > well, > try suggested idea about using scratch CPU and it might get merged sooner. > (it's not like I'm suggesting you to rewrite half of QEMU, just some of > patches, which most likely would simplify series from my point of view > and would be easier to maintain) > I don't see anything in the series which will go away if I implement this idea but as I hate it deerly I'm likely not going to.
Vitaly Kuznetsov <vkuznets@redhat.com> writes: > Igor Mammedov <imammedo@redhat.com> writes: > >>> >>> We need to distinguish because that would be sane. >>> >>> Enlightened VMCS is an extension to VMX, it can't be used without >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, >> ... >>> That bein said, if >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >>> there is a problem with explicit enablement: what should >>> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >>> sound sane to me. >> based on above I'd error out is user asks for unsupported option >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out > > That's what I keep telling you but you don't seem to listen. 'Scratch > CPU' can't possibly help with this use-case because when you parse > > 'hv-passthrough,hv-evmcs,vmx=off' you > > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the > host. > > 2) 'hv-evmcs' -> keep EVMCS bit '1' > > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. > > We have to remember which options were aquired from the host and which > were set explicitly by the user. Igor, could you please comment on the above? In case my line of thought is correct, and it is impossible to distinguish between e.g. 'hv-passthrough,hv-evmcs,-vmx' and 'hv-passthrough,-vmx' without a custom parser (written just exactly the way I did in this version, for example) regardless of when 'hv-passthrough' is expanded. E.g. we have the exact same problem with 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing 'scratch CPUs' idea at this point because it is not going to change anything at all ('hv_features_on' will stay, custom parsers will stay). Am I missing something?
On Mon, 22 Feb 2021 11:20:34 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Vitaly Kuznetsov <vkuznets@redhat.com> writes: > > > Igor Mammedov <imammedo@redhat.com> writes: > > > >>> > >>> We need to distinguish because that would be sane. > >>> > >>> Enlightened VMCS is an extension to VMX, it can't be used without > >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, > >> ... > >>> That bein said, if > >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, > >>> there is a problem with explicit enablement: what should > >>> > >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't > >>> sound sane to me. > >> based on above I'd error out is user asks for unsupported option > >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out > > > > That's what I keep telling you but you don't seem to listen. 'Scratch > > CPU' can't possibly help with this use-case because when you parse > > > > 'hv-passthrough,hv-evmcs,vmx=off' you > > > > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the > > host. > > > > 2) 'hv-evmcs' -> keep EVMCS bit '1' > > > > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. > > > > We have to remember which options were aquired from the host and which > > were set explicitly by the user. > > Igor, > > could you please comment on the above? In case my line of thought is > correct, and it is impossible to distinguish between e.g. > > 'hv-passthrough,hv-evmcs,-vmx' > and > 'hv-passthrough,-vmx' > > without a custom parser (written just exactly the way I did in this > version, for example) regardless of when 'hv-passthrough' is > expanded. E.g. we have the exact same problem with > 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing right, if we need to distinguish between explicit and implicit hv-evmcs set by hv-passthrough custom parser probably the way to go. However do we need actually need to do it? I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' and it applies not only hv-evmcs but other features hv-passthrough might set (i.e. if whatever was [un]set by hv-passthrough in combination with other features results in invalid config, QEMU shall error out instead of magically altering host provided hv-passthrough value). something like: 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set should result in error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" making host's features set, *magically* mutable, depending on other user provided features is a bit confusing. One would never know what hv-passthrough actually means, and if enabling/disabling 'random' feature changes it. It's cleaner to do just what user asked (whether implicitly or explicitly) and error out in case it ends up in nonsense configuration. > 'scratch CPUs' idea at this point because it is not going to change > anything at all ('hv_features_on' will stay, custom parsers will stay).g > > Am I missing something? >
Igor Mammedov <imammedo@redhat.com> writes: > On Mon, 22 Feb 2021 11:20:34 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> > >> >>> >> >>> We need to distinguish because that would be sane. >> >>> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, >> >> ... >> >>> That bein said, if >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >> >>> there is a problem with explicit enablement: what should >> >>> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >> >>> sound sane to me. >> >> based on above I'd error out is user asks for unsupported option >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out >> > >> > That's what I keep telling you but you don't seem to listen. 'Scratch >> > CPU' can't possibly help with this use-case because when you parse >> > >> > 'hv-passthrough,hv-evmcs,vmx=off' you >> > >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the >> > host. >> > >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' >> > >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. >> > >> > We have to remember which options were aquired from the host and which >> > were set explicitly by the user. >> >> Igor, >> >> could you please comment on the above? In case my line of thought is >> correct, and it is impossible to distinguish between e.g. >> >> 'hv-passthrough,hv-evmcs,-vmx' >> and >> 'hv-passthrough,-vmx' >> >> without a custom parser (written just exactly the way I did in this >> version, for example) regardless of when 'hv-passthrough' is >> expanded. E.g. we have the exact same problem with >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing > > right, if we need to distinguish between explicit and implicit hv-evmcs set by > hv-passthrough custom parser probably the way to go. > > However do we need actually need to do it? I think we really need that. See below ... > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' > and it applies not only hv-evmcs but other features hv-passthrough might set > (i.e. if whatever was [un]set by hv-passthrough in combination with other > features results in invalid config, QEMU shall error out instead of magically > altering host provided hv-passthrough value). > > something like: > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set > should result in > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" > > making host's features set, *magically* mutable, depending on other user provided features > is a bit confusing. One would never know what hv-passthrough actually means, and if > enabling/disabling 'random' feature changes it. > > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out > in case it ends up in nonsense configuration. > I don't seem to agree this is a sane behavior, especially if you replace 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for Windows guests is common if you'd want to avoid nested configuration: even without any Hyper-V guests created, Windows itself is a Hyper-V partition. So a sane user will do: '-cpu host,hv-default,vmx=off' and on Intel he will get an error, and on AMD he won't. So what you're suggesting actually defeats the whole purpose of 'hv-default' as upper-layer tools (think libvirt) will need to know that Intel configurations for Windows guests are somewhat different. They'll need to know what 'hv-evmcs' is. We're back to where we've started. If we are to follow this approach let's just throw away 'hv-evmcs' from 'hv-default' set, it's going to be much cleaner. But again, I don't really believe it's the right way to go.
On Tue, 23 Feb 2021 16:46:50 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Mon, 22 Feb 2021 11:20:34 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: > >> > >> > Igor Mammedov <imammedo@redhat.com> writes: > >> > > >> >>> > >> >>> We need to distinguish because that would be sane. > >> >>> > >> >>> Enlightened VMCS is an extension to VMX, it can't be used without > >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, > >> >> ... > >> >>> That bein said, if > >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, > >> >>> there is a problem with explicit enablement: what should > >> >>> > >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't > >> >>> sound sane to me. > >> >> based on above I'd error out is user asks for unsupported option > >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out > >> > > >> > That's what I keep telling you but you don't seem to listen. 'Scratch > >> > CPU' can't possibly help with this use-case because when you parse > >> > > >> > 'hv-passthrough,hv-evmcs,vmx=off' you > >> > > >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the > >> > host. > >> > > >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' > >> > > >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. > >> > > >> > We have to remember which options were aquired from the host and which > >> > were set explicitly by the user. > >> > >> Igor, > >> > >> could you please comment on the above? In case my line of thought is > >> correct, and it is impossible to distinguish between e.g. > >> > >> 'hv-passthrough,hv-evmcs,-vmx' > >> and > >> 'hv-passthrough,-vmx' > >> > >> without a custom parser (written just exactly the way I did in this > >> version, for example) regardless of when 'hv-passthrough' is > >> expanded. E.g. we have the exact same problem with > >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing > > > > right, if we need to distinguish between explicit and implicit hv-evmcs set by > > hv-passthrough custom parser probably the way to go. > > > > However do we need actually need to do it? > > I think we really need that. See below ... > > > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' > > and it applies not only hv-evmcs but other features hv-passthrough might set > > (i.e. if whatever was [un]set by hv-passthrough in combination with other > > features results in invalid config, QEMU shall error out instead of magically > > altering host provided hv-passthrough value). > > > > something like: > > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set > > should result in > > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," > > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" > > > > making host's features set, *magically* mutable, depending on other user provided features > > is a bit confusing. One would never know what hv-passthrough actually means, and if > > enabling/disabling 'random' feature changes it. > > > > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out > > in case it ends up in nonsense configuration. > > > > I don't seem to agree this is a sane behavior, especially if you replace > 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for > Windows guests is common if you'd want to avoid nested configuration: > even without any Hyper-V guests created, Windows itself is a Hyper-V > partition. > > So a sane user will do: > > '-cpu host,hv-default,vmx=off' > > and on Intel he will get an error, and on AMD he won't. > > So what you're suggesting actually defeats the whole purpose of > 'hv-default' as upper-layer tools (think libvirt) will need to know that I'd assume it would be hard for libvirt to use 'hv-default' from migration point of view. It's semi opaque (one can find out what features it sets indirectly inspecting individual hv_foo features, and mgmt will need to know about them). If it will mutate when other features [un]set, upper layers might need to enumerate all these permutations to know which hosts are compatible or compare host feature sets every time before attempting migration. > Intel configurations for Windows guests are somewhat different. They'll > need to know what 'hv-evmcs' is. We're back to where we've started. we were talking about hv-passthrough, and if host advertises hv-evmcs QEMU should complain if user disabled features it depends on ( not silently fixing up configuration error). But the same applies to hv-default. > If we are to follow this approach let's just throw away 'hv-evmcs' from > 'hv-default' set, it's going to be much cleaner. But again, I don't > really believe it's the right way to go. if desired behavior, on Intel host for above config, to start without error then indeed defaults should not set 'hv-evmcs' if it results in invalid feature set.
Igor Mammedov <imammedo@redhat.com> writes: > On Tue, 23 Feb 2021 16:46:50 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Igor Mammedov <imammedo@redhat.com> writes: >> >> > On Mon, 22 Feb 2021 11:20:34 +0100 >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> > >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> >> > >> >> >>> >> >> >>> We need to distinguish because that would be sane. >> >> >>> >> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without >> >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, >> >> >> ... >> >> >>> That bein said, if >> >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >> >> >>> there is a problem with explicit enablement: what should >> >> >>> >> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >> >> >>> sound sane to me. >> >> >> based on above I'd error out is user asks for unsupported option >> >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out >> >> > >> >> > That's what I keep telling you but you don't seem to listen. 'Scratch >> >> > CPU' can't possibly help with this use-case because when you parse >> >> > >> >> > 'hv-passthrough,hv-evmcs,vmx=off' you >> >> > >> >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the >> >> > host. >> >> > >> >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' >> >> > >> >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. >> >> > >> >> > We have to remember which options were aquired from the host and which >> >> > were set explicitly by the user. >> >> >> >> Igor, >> >> >> >> could you please comment on the above? In case my line of thought is >> >> correct, and it is impossible to distinguish between e.g. >> >> >> >> 'hv-passthrough,hv-evmcs,-vmx' >> >> and >> >> 'hv-passthrough,-vmx' >> >> >> >> without a custom parser (written just exactly the way I did in this >> >> version, for example) regardless of when 'hv-passthrough' is >> >> expanded. E.g. we have the exact same problem with >> >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing >> > >> > right, if we need to distinguish between explicit and implicit hv-evmcs set by >> > hv-passthrough custom parser probably the way to go. >> > >> > However do we need actually need to do it? >> >> I think we really need that. See below ... >> >> > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' >> > and it applies not only hv-evmcs but other features hv-passthrough might set >> > (i.e. if whatever was [un]set by hv-passthrough in combination with other >> > features results in invalid config, QEMU shall error out instead of magically >> > altering host provided hv-passthrough value). >> > >> > something like: >> > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set >> > should result in >> > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," >> > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" >> > >> > making host's features set, *magically* mutable, depending on other user provided features >> > is a bit confusing. One would never know what hv-passthrough actually means, and if >> > enabling/disabling 'random' feature changes it. >> > >> > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out >> > in case it ends up in nonsense configuration. >> > >> >> I don't seem to agree this is a sane behavior, especially if you replace >> 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for >> Windows guests is common if you'd want to avoid nested configuration: >> even without any Hyper-V guests created, Windows itself is a Hyper-V >> partition. >> >> So a sane user will do: >> >> '-cpu host,hv-default,vmx=off' >> >> and on Intel he will get an error, and on AMD he won't. >> >> So what you're suggesting actually defeats the whole purpose of >> 'hv-default' as upper-layer tools (think libvirt) will need to know that > I'd assume it would be hard for libvirt to use 'hv-default' from migration > point of view. It's semi opaque (one can find out what features it sets > indirectly inspecting individual hv_foo features, and mgmt will need to > know about them). If it will mutate when other features [un]set, upper > layers might need to enumerate all these permutations to know which hosts > are compatible or compare host feature sets every time before attempting > migration. > That's exactly the opposite of what's the goal here which is: make it possible for upper layers to not know anything about Hyper-V enlightenments besides 'hv-default'. Migration should work just fine, if the rest of guest configuration matches -- then 'hv-default' will create the exact same things (e.g. if 'vmx' was disabled on the source it has to be enabled on the destination, it can't be different) >> Intel configurations for Windows guests are somewhat different. They'll >> need to know what 'hv-evmcs' is. We're back to where we've started. > > we were talking about hv-passthrough, and if host advertises hv-evmcs > QEMU should complain if user disabled features it depends on ( > not silently fixing up configuration error). > But the same applies to hv-default. Let's forget about hv-passthrough completely for a while as this series is kind of unrelated to it. In the previous submission I was setting 'hv-default' based on host availability of the feature only. That is: set on Intel, unset on AMD. We have to at least preserve that because it would be insane to crash on -cpu host,hv-default on AMD because AMD doesn't (and never will!) support hv-evmcs, right? > >> If we are to follow this approach let's just throw away 'hv-evmcs' from >> 'hv-default' set, it's going to be much cleaner. But again, I don't >> really believe it's the right way to go. > > if desired behavior, on Intel host for above config, to start without error > then indeed defaults should not set 'hv-evmcs' if it results in invalid > feature set. This is problematic as it is still sane for everyone to enable it as it gives performance advantage. If we just for a second forget about custom parsers and all that -- which is just an implementation detail, why can't we tie 'hv-evmcs' bit in 'hv-default' to 'vxm' 1:1? Again, the end goal is: make it possible for upper layers to now know anything about Hyper-V enlightenments other than 'hv-default'. Technically, it is possible to make it work.
On Tue, 23 Feb 2021 19:08:42 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Tue, 23 Feb 2021 16:46:50 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Igor Mammedov <imammedo@redhat.com> writes: > >> > >> > On Mon, 22 Feb 2021 11:20:34 +0100 > >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> > > >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: > >> >> > >> >> > Igor Mammedov <imammedo@redhat.com> writes: > >> >> > > >> >> >>> > >> >> >>> We need to distinguish because that would be sane. > >> >> >>> > >> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without > >> >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, > >> >> >> ... > >> >> >>> That bein said, if > >> >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, > >> >> >>> there is a problem with explicit enablement: what should > >> >> >>> > >> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't > >> >> >>> sound sane to me. > >> >> >> based on above I'd error out is user asks for unsupported option > >> >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out > >> >> > > >> >> > That's what I keep telling you but you don't seem to listen. 'Scratch > >> >> > CPU' can't possibly help with this use-case because when you parse > >> >> > > >> >> > 'hv-passthrough,hv-evmcs,vmx=off' you > >> >> > > >> >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the > >> >> > host. > >> >> > > >> >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' > >> >> > > >> >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. > >> >> > > >> >> > We have to remember which options were aquired from the host and which > >> >> > were set explicitly by the user. > >> >> > >> >> Igor, > >> >> > >> >> could you please comment on the above? In case my line of thought is > >> >> correct, and it is impossible to distinguish between e.g. > >> >> > >> >> 'hv-passthrough,hv-evmcs,-vmx' > >> >> and > >> >> 'hv-passthrough,-vmx' > >> >> > >> >> without a custom parser (written just exactly the way I did in this > >> >> version, for example) regardless of when 'hv-passthrough' is > >> >> expanded. E.g. we have the exact same problem with > >> >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing > >> > > >> > right, if we need to distinguish between explicit and implicit hv-evmcs set by > >> > hv-passthrough custom parser probably the way to go. > >> > > >> > However do we need actually need to do it? > >> > >> I think we really need that. See below ... > >> > >> > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' > >> > and it applies not only hv-evmcs but other features hv-passthrough might set > >> > (i.e. if whatever was [un]set by hv-passthrough in combination with other > >> > features results in invalid config, QEMU shall error out instead of magically > >> > altering host provided hv-passthrough value). > >> > > >> > something like: > >> > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set > >> > should result in > >> > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," > >> > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" > >> > > >> > making host's features set, *magically* mutable, depending on other user provided features > >> > is a bit confusing. One would never know what hv-passthrough actually means, and if > >> > enabling/disabling 'random' feature changes it. > >> > > >> > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out > >> > in case it ends up in nonsense configuration. > >> > > >> > >> I don't seem to agree this is a sane behavior, especially if you replace > >> 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for > >> Windows guests is common if you'd want to avoid nested configuration: > >> even without any Hyper-V guests created, Windows itself is a Hyper-V > >> partition. > >> > >> So a sane user will do: > >> > >> '-cpu host,hv-default,vmx=off' > >> > >> and on Intel he will get an error, and on AMD he won't. > >> > >> So what you're suggesting actually defeats the whole purpose of > >> 'hv-default' as upper-layer tools (think libvirt) will need to know that > > I'd assume it would be hard for libvirt to use 'hv-default' from migration > > point of view. It's semi opaque (one can find out what features it sets > > indirectly inspecting individual hv_foo features, and mgmt will need to > > know about them). If it will mutate when other features [un]set, upper > > layers might need to enumerate all these permutations to know which hosts > > are compatible or compare host feature sets every time before attempting > > migration. > > That's exactly the opposite of what's the goal here which is: make it > possible for upper layers to not know anything about Hyper-V > enlightenments besides 'hv-default'. Migration should work just fine, if > the rest of guest configuration matches -- then 'hv-default' will create > the exact same things (e.g. if 'vmx' was disabled on the source it has ^^^^^ I'm not convinced in that yet (not with current impl. more on that at the end of reply) > to be enabled on the destination, it can't be different) > > > >> Intel configurations for Windows guests are somewhat different. They'll > >> need to know what 'hv-evmcs' is. We're back to where we've started. > > > > we were talking about hv-passthrough, and if host advertises hv-evmcs > > QEMU should complain if user disabled features it depends on ( > > not silently fixing up configuration error). > > But the same applies to hv-default. > > Let's forget about hv-passthrough completely for a while as this series > is kind of unrelated to it. It adds a lot for unrelated code (not just couple of lines), I've played with scratch CPU idea, here is demo of it https://github.com/imammedo/qemu/commit/a4b107d5368ebf72d45082bc8310a6b88a4ba6fb I didn't rework caps/cpuid querying parts (just hacked around it), and even without that it saves us ~200LOC (not a small part of which comes with this series). I also split horrible hv_cpuid_check_and_set into separate 'set' and 'check' stages. Granted it was sort-of pre-existing ugly code, some of your re-factoring made it a bit better but it's still far from readable. > In the previous submission I was setting 'hv-default' based on host > availability of the feature only. That is: set on Intel, unset on > AMD. We have to at least preserve that because it would be insane to > crash on > > -cpu host,hv-default > > on AMD because AMD doesn't (and never will!) support hv-evmcs, right? If QEMU prevents cross arch migration i.e. it's not supported, then I guess we can make hv-default different depending on AMD or Intel host. If not then we might need to be conservative i.e. exclude hv-evmcs from defaults. > >> If we are to follow this approach let's just throw away 'hv-evmcs' from > >> 'hv-default' set, it's going to be much cleaner. But again, I don't > >> really believe it's the right way to go. > > > > if desired behavior, on Intel host for above config, to start without error > > then indeed defaults should not set 'hv-evmcs' if it results in invalid > > feature set. > > This is problematic as it is still sane for everyone to enable it as it > gives performance advantage. If we just for a second forget about custom " > >> So a sane user will do: > >> > >> '-cpu host,hv-default,vmx=off' " it's not easy picking defaults. > parsers and all that -- which is just an implementation detail, why can't > we tie 'hv-evmcs' bit in 'hv-default' to 'vxm' 1:1? migration wise I don't see issues wrt vmx=off turning of hv-evmcs, however ... we were replacing user input fixups with hard errors asking user to fix CLI and removing custom parsers in favor of generic ones. In vmx=off case we would be fixing up what 'hv-default' explicitly set. Same applies to other hv-foo set by hv-default. ex: 'hv-default,hv-dep1=off', will turn off some dependent feature for other hv feature in hv-default set and it will error out, same goes on for enabling feature that has dependencies. Why should we treat hv-evmcs/vmx pair any different? Granted exiting with error is not the best UX, but at least it says to user what's wrong with CLI and how to fix it. Also it lets to keep QEMU code manageable and with consistent behavior. > Again, the end goal is: make it possible for upper layers to now know > anything about Hyper-V enlightenments other than 'hv-default'. I'm still doubtful about feasibility of this goal when migration is considered. It sure would work if hosts are identical hw/sw wise. In mixed setup all features, except of hv-evmcs, that included in 'hv-default', will error out in case host doesn't support it, which should prevent incompatible migration so that's also fine. But hv-evmcs will silently go away if host doesn't support it, which is issue when migration happens to/from host that supports it. Maybe to help mgmt to figure out hosts compatibility 1. it should know about hv-evmcs to query it's status 2. or default value set by 'hv-default' should be exposed to mgmt so it could compare whole feature-set in one go without being aware of individual features. Additionally on QEMU side for such conditional features we can theoretically add a subsection to migration stream when feature is enabled, that way we at least can prevent 'successful' migration, when destination value doesn't match. But this might already be over-engineering on my part. > Technically, it is possible to make it work.
Igor Mammedov <imammedo@redhat.com> writes: > On Tue, 23 Feb 2021 19:08:42 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Igor Mammedov <imammedo@redhat.com> writes: >> >> > On Tue, 23 Feb 2021 16:46:50 +0100 >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> > >> >> Igor Mammedov <imammedo@redhat.com> writes: >> >> >> >> > On Mon, 22 Feb 2021 11:20:34 +0100 >> >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> >> > >> >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> >> >> >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> >> >> > >> >> >> >>> >> >> >> >>> We need to distinguish because that would be sane. >> >> >> >>> >> >> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without >> >> >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, >> >> >> >> ... >> >> >> >>> That bein said, if >> >> >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >> >> >> >>> there is a problem with explicit enablement: what should >> >> >> >>> >> >> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >> >> >> >>> sound sane to me. >> >> >> >> based on above I'd error out is user asks for unsupported option >> >> >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out >> >> >> > >> >> >> > That's what I keep telling you but you don't seem to listen. 'Scratch >> >> >> > CPU' can't possibly help with this use-case because when you parse >> >> >> > >> >> >> > 'hv-passthrough,hv-evmcs,vmx=off' you >> >> >> > >> >> >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the >> >> >> > host. >> >> >> > >> >> >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' >> >> >> > >> >> >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. >> >> >> > >> >> >> > We have to remember which options were aquired from the host and which >> >> >> > were set explicitly by the user. >> >> >> >> >> >> Igor, >> >> >> >> >> >> could you please comment on the above? In case my line of thought is >> >> >> correct, and it is impossible to distinguish between e.g. >> >> >> >> >> >> 'hv-passthrough,hv-evmcs,-vmx' >> >> >> and >> >> >> 'hv-passthrough,-vmx' >> >> >> >> >> >> without a custom parser (written just exactly the way I did in this >> >> >> version, for example) regardless of when 'hv-passthrough' is >> >> >> expanded. E.g. we have the exact same problem with >> >> >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing >> >> > >> >> > right, if we need to distinguish between explicit and implicit hv-evmcs set by >> >> > hv-passthrough custom parser probably the way to go. >> >> > >> >> > However do we need actually need to do it? >> >> >> >> I think we really need that. See below ... >> >> >> >> > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' >> >> > and it applies not only hv-evmcs but other features hv-passthrough might set >> >> > (i.e. if whatever was [un]set by hv-passthrough in combination with other >> >> > features results in invalid config, QEMU shall error out instead of magically >> >> > altering host provided hv-passthrough value). >> >> > >> >> > something like: >> >> > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set >> >> > should result in >> >> > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," >> >> > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" >> >> > >> >> > making host's features set, *magically* mutable, depending on other user provided features >> >> > is a bit confusing. One would never know what hv-passthrough actually means, and if >> >> > enabling/disabling 'random' feature changes it. >> >> > >> >> > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out >> >> > in case it ends up in nonsense configuration. >> >> > >> >> >> >> I don't seem to agree this is a sane behavior, especially if you replace >> >> 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for >> >> Windows guests is common if you'd want to avoid nested configuration: >> >> even without any Hyper-V guests created, Windows itself is a Hyper-V >> >> partition. >> >> >> >> So a sane user will do: >> >> >> >> '-cpu host,hv-default,vmx=off' >> >> >> >> and on Intel he will get an error, and on AMD he won't. >> >> >> >> So what you're suggesting actually defeats the whole purpose of >> >> 'hv-default' as upper-layer tools (think libvirt) will need to know that >> > I'd assume it would be hard for libvirt to use 'hv-default' from migration >> > point of view. It's semi opaque (one can find out what features it sets >> > indirectly inspecting individual hv_foo features, and mgmt will need to >> > know about them). If it will mutate when other features [un]set, upper >> > layers might need to enumerate all these permutations to know which hosts >> > are compatible or compare host feature sets every time before attempting >> > migration. >> >> That's exactly the opposite of what's the goal here which is: make it >> possible for upper layers to not know anything about Hyper-V >> enlightenments besides 'hv-default'. Migration should work just fine, if >> the rest of guest configuration matches -- then 'hv-default' will create >> the exact same things (e.g. if 'vmx' was disabled on the source it has > ^^^^^ > I'm not convinced in that yet (not with current impl. more on that at the end of reply) > >> to be enabled on the destination, it can't be different) >> >> >> >> Intel configurations for Windows guests are somewhat different. They'll >> >> need to know what 'hv-evmcs' is. We're back to where we've started. >> > >> > we were talking about hv-passthrough, and if host advertises hv-evmcs >> > QEMU should complain if user disabled features it depends on ( >> > not silently fixing up configuration error). >> > But the same applies to hv-default. >> >> Let's forget about hv-passthrough completely for a while as this series >> is kind of unrelated to it. > > It adds a lot for unrelated code (not just couple of lines), > I've played with scratch CPU idea, here is demo of it > https://github.com/imammedo/qemu/commit/a4b107d5368ebf72d45082bc8310a6b88a4ba6fb > I didn't rework caps/cpuid querying parts (just hacked around it), > and even without that it saves us ~200LOC (not a small part of which comes > with this series). All your savings come from throwing away custom parsers -- which are not needed at all if we don't distinguish between 'hv-default,hv-evmcs' and 'hv-default' it's just not needed, don't count these patches in. Or, if it is needed, please explain how your scratch CPU is making things different. I guess it is not so we can discuss this outside of this series. > I also split horrible hv_cpuid_check_and_set into separate 'set' and 'check' stages. > Granted it was sort-of pre-existing ugly code, some of your > re-factoring made it a bit better but it's still far from readable. > hv_cpuid_check_and_set() is already there, I'm not at all opposed to making this code even better but I don't see it as a must for this particular feature (hv-default). >> In the previous submission I was setting 'hv-default' based on host >> availability of the feature only. That is: set on Intel, unset on >> AMD. We have to at least preserve that because it would be insane to >> crash on >> >> -cpu host,hv-default >> >> on AMD because AMD doesn't (and never will!) support hv-evmcs, right? > > If QEMU prevents cross arch migration i.e. it's not supported, > then I guess we can make hv-default different depending on AMD or Intel host. > If not then we might need to be conservative i.e. exclude hv-evmcs from defaults. > Forget about cross vendor. I want to tie it 1:1 to VMX feature on the guest CPU -- which happens to be only available on Intel. It is absolutely impossible to migrate VMX enabled guest to VMX-disabled destination, with or without evmcs. >> >> If we are to follow this approach let's just throw away 'hv-evmcs' from >> >> 'hv-default' set, it's going to be much cleaner. But again, I don't >> >> really believe it's the right way to go. >> > >> > if desired behavior, on Intel host for above config, to start without error >> > then indeed defaults should not set 'hv-evmcs' if it results in invalid >> > feature set. >> >> This is problematic as it is still sane for everyone to enable it as it >> gives performance advantage. If we just for a second forget about custom > " > > >> So a sane user will do: > > >> > > >> '-cpu host,hv-default,vmx=off' > " > it's not easy picking defaults. > >> parsers and all that -- which is just an implementation detail, why can't >> we tie 'hv-evmcs' bit in 'hv-default' to 'vxm' 1:1? > migration wise I don't see issues wrt vmx=off turning of hv-evmcs, > however ... > > we were replacing user input fixups with hard errors asking > user to fix CLI and removing custom parsers in favor of generic ones. > > In vmx=off case we would be fixing up what 'hv-default' explicitly set. > Same applies to other hv-foo set by hv-default. > > ex: 'hv-default,hv-dep1=off', will turn off some dependent feature > for other hv feature in hv-default set and it will error out, > same goes on for enabling feature that has dependencies. > Why should we treat hv-evmcs/vmx pair any different? > VMX is not part of Hyper-V enlightenments, is it? It can also be coming from a CPU model: "-cpu MyLovelyModelWithVmx,hv-default" should not throw an error! Again, the goal is for userspace to not know anything besides 'hv-default' for Hyper-V enlightenments. > Granted exiting with error is not the best UX, but at least it says to user > what's wrong with CLI and how to fix it. Also it lets to keep QEMU code > manageable and with consistent behavior. Enabling EVMCS only when guest CPU has VMX is a smart behavior, all users want that. It is very consistent with how genuine Hyper-V behaves. > >> Again, the end goal is: make it possible for upper layers to now know >> anything about Hyper-V enlightenments other than 'hv-default'. > I'm still doubtful about feasibility of this goal when migration is considered. > It sure would work if hosts are identical hw/sw wise. > In mixed setup all features, except of hv-evmcs, that included in 'hv-default', > will error out in case host doesn't support it, which should prevent > incompatible migration so that's also fine. > > But hv-evmcs will silently go away if host doesn't support it, > which is issue when migration happens to/from host that supports it. But how can it go away? In KVM, hv-evmcs support is not conditional, basically, all KVMs which support netsting state migration also support EVMCS. > > Maybe to help mgmt to figure out hosts compatibility > 1. it should know about hv-evmcs to query it's status > 2. or default value set by 'hv-default' should be exposed to mgmt > so it could compare whole feature-set in one go without being > aware of individual features. > > Additionally on QEMU side for such conditional features we can > theoretically add a subsection to migration stream when feature > is enabled, that way we at least can prevent 'successful' migration, > when destination value doesn't match. But this might already be > over-engineering on my part. I think you're trying to solve an issue which doesn't exist. In case we're successfully migrating a nested enabled guest, our KVM is modern enough and supports EVMCS (on Intel, of course). Also, nested state (which is part of the migration stream) has EVMCS flag, we can't migrate somewhere where the flag is unsupported. Anyway, I feel we're walking in circles. I'm ready to just drop all EVMCS related bits from this seies to get it merged. This will make the part which you hate the most ("custom parsers") go away too. We can discuss EVMCS to death after that and, when we finally decide that user convenience is actually worth something, we can add 'hv-evmcs' to the new Eduardo, do you see any way forward here?
On Wed, 24 Feb 2021 18:00:43 +0100 Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > Igor Mammedov <imammedo@redhat.com> writes: > > > On Tue, 23 Feb 2021 19:08:42 +0100 > > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > > > >> Igor Mammedov <imammedo@redhat.com> writes: > >> > >> > On Tue, 23 Feb 2021 16:46:50 +0100 > >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> > > >> >> Igor Mammedov <imammedo@redhat.com> writes: > >> >> > >> >> > On Mon, 22 Feb 2021 11:20:34 +0100 > >> >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> >> > > >> >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: > >> >> >> > >> >> >> > Igor Mammedov <imammedo@redhat.com> writes: > >> >> >> > > >> >> >> >>> > >> >> >> >>> We need to distinguish because that would be sane. > >> >> >> >>> > >> >> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without > >> >> >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, > >> >> >> >> ... > >> >> >> >>> That bein said, if > >> >> >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, > >> >> >> >>> there is a problem with explicit enablement: what should > >> >> >> >>> > >> >> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't > >> >> >> >>> sound sane to me. > >> >> >> >> based on above I'd error out is user asks for unsupported option > >> >> >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out > >> >> >> > > >> >> >> > That's what I keep telling you but you don't seem to listen. 'Scratch > >> >> >> > CPU' can't possibly help with this use-case because when you parse > >> >> >> > > >> >> >> > 'hv-passthrough,hv-evmcs,vmx=off' you > >> >> >> > > >> >> >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the > >> >> >> > host. > >> >> >> > > >> >> >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' > >> >> >> > > >> >> >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. > >> >> >> > > >> >> >> > We have to remember which options were aquired from the host and which > >> >> >> > were set explicitly by the user. > >> >> >> > >> >> >> Igor, > >> >> >> > >> >> >> could you please comment on the above? In case my line of thought is > >> >> >> correct, and it is impossible to distinguish between e.g. > >> >> >> > >> >> >> 'hv-passthrough,hv-evmcs,-vmx' > >> >> >> and > >> >> >> 'hv-passthrough,-vmx' > >> >> >> > >> >> >> without a custom parser (written just exactly the way I did in this > >> >> >> version, for example) regardless of when 'hv-passthrough' is > >> >> >> expanded. E.g. we have the exact same problem with > >> >> >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing > >> >> > > >> >> > right, if we need to distinguish between explicit and implicit hv-evmcs set by > >> >> > hv-passthrough custom parser probably the way to go. > >> >> > > >> >> > However do we need actually need to do it? > >> >> > >> >> I think we really need that. See below ... > >> >> > >> >> > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' > >> >> > and it applies not only hv-evmcs but other features hv-passthrough might set > >> >> > (i.e. if whatever was [un]set by hv-passthrough in combination with other > >> >> > features results in invalid config, QEMU shall error out instead of magically > >> >> > altering host provided hv-passthrough value). > >> >> > > >> >> > something like: > >> >> > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set > >> >> > should result in > >> >> > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," > >> >> > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" > >> >> > > >> >> > making host's features set, *magically* mutable, depending on other user provided features > >> >> > is a bit confusing. One would never know what hv-passthrough actually means, and if > >> >> > enabling/disabling 'random' feature changes it. > >> >> > > >> >> > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out > >> >> > in case it ends up in nonsense configuration. > >> >> > > >> >> > >> >> I don't seem to agree this is a sane behavior, especially if you replace > >> >> 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for > >> >> Windows guests is common if you'd want to avoid nested configuration: > >> >> even without any Hyper-V guests created, Windows itself is a Hyper-V > >> >> partition. > >> >> > >> >> So a sane user will do: > >> >> > >> >> '-cpu host,hv-default,vmx=off' > >> >> > >> >> and on Intel he will get an error, and on AMD he won't. > >> >> > >> >> So what you're suggesting actually defeats the whole purpose of > >> >> 'hv-default' as upper-layer tools (think libvirt) will need to know that > >> > I'd assume it would be hard for libvirt to use 'hv-default' from migration > >> > point of view. It's semi opaque (one can find out what features it sets > >> > indirectly inspecting individual hv_foo features, and mgmt will need to > >> > know about them). If it will mutate when other features [un]set, upper > >> > layers might need to enumerate all these permutations to know which hosts > >> > are compatible or compare host feature sets every time before attempting > >> > migration. > >> > >> That's exactly the opposite of what's the goal here which is: make it > >> possible for upper layers to not know anything about Hyper-V > >> enlightenments besides 'hv-default'. Migration should work just fine, if > >> the rest of guest configuration matches -- then 'hv-default' will create > >> the exact same things (e.g. if 'vmx' was disabled on the source it has > > ^^^^^ > > I'm not convinced in that yet (not with current impl. more on that at the end of reply) > > > >> to be enabled on the destination, it can't be different) > >> > >> > >> >> Intel configurations for Windows guests are somewhat different. They'll > >> >> need to know what 'hv-evmcs' is. We're back to where we've started. > >> > > >> > we were talking about hv-passthrough, and if host advertises hv-evmcs > >> > QEMU should complain if user disabled features it depends on ( > >> > not silently fixing up configuration error). > >> > But the same applies to hv-default. > >> > >> Let's forget about hv-passthrough completely for a while as this series > >> is kind of unrelated to it. > > > > It adds a lot for unrelated code (not just couple of lines), > > I've played with scratch CPU idea, here is demo of it > > https://github.com/imammedo/qemu/commit/a4b107d5368ebf72d45082bc8310a6b88a4ba6fb > > I didn't rework caps/cpuid querying parts (just hacked around it), > > and even without that it saves us ~200LOC (not a small part of which comes > > with this series). > > All your savings come from throwing away custom parsers -- which are not > needed at all if we don't distinguish between > > 'hv-default,hv-evmcs' and 'hv-default' > > it's just not needed, don't count these patches in. Or, if it is needed, > please explain how your scratch CPU is making things different. I guess > it is not so we can discuss this outside of this series. scratch CPU helps with hostpassthrough refactoring which also brings in dependency on custom parser. > > I also split horrible hv_cpuid_check_and_set into separate 'set' and 'check' stages. > > Granted it was sort-of pre-existing ugly code, some of your > > re-factoring made it a bit better but it's still far from readable. > > > > hv_cpuid_check_and_set() is already there, I'm not at all opposed to > making this code even better but I don't see it as a must for this > particular feature (hv-default). it's not a must have for hv-default especially if you don't touch it. (however if you touch it & co, I'd ask to clean it up first) > >> In the previous submission I was setting 'hv-default' based on host > >> availability of the feature only. That is: set on Intel, unset on > >> AMD. We have to at least preserve that because it would be insane to > >> crash on > >> > >> -cpu host,hv-default > >> > >> on AMD because AMD doesn't (and never will!) support hv-evmcs, right? > > > > If QEMU prevents cross arch migration i.e. it's not supported, > > then I guess we can make hv-default different depending on AMD or Intel host. > > If not then we might need to be conservative i.e. exclude hv-evmcs from defaults. > > > > Forget about cross vendor. I want to tie it 1:1 to VMX feature on the > guest CPU -- which happens to be only available on Intel. It is > absolutely impossible to migrate VMX enabled guest to VMX-disabled > destination, with or without evmcs. Ok > >> >> If we are to follow this approach let's just throw away 'hv-evmcs' from > >> >> 'hv-default' set, it's going to be much cleaner. But again, I don't > >> >> really believe it's the right way to go. > >> > > >> > if desired behavior, on Intel host for above config, to start without error > >> > then indeed defaults should not set 'hv-evmcs' if it results in invalid > >> > feature set. > >> > >> This is problematic as it is still sane for everyone to enable it as it > >> gives performance advantage. If we just for a second forget about custom > > " > > > >> So a sane user will do: > > > >> > > > >> '-cpu host,hv-default,vmx=off' > > " > > it's not easy picking defaults. > > > >> parsers and all that -- which is just an implementation detail, why can't > >> we tie 'hv-evmcs' bit in 'hv-default' to 'vxm' 1:1? > > migration wise I don't see issues wrt vmx=off turning of hv-evmcs, > > however ... > > > > we were replacing user input fixups with hard errors asking > > user to fix CLI and removing custom parsers in favor of generic ones. > > > > In vmx=off case we would be fixing up what 'hv-default' explicitly set. > > Same applies to other hv-foo set by hv-default. > > > > ex: 'hv-default,hv-dep1=off', will turn off some dependent feature > > for other hv feature in hv-default set and it will error out, > > same goes on for enabling feature that has dependencies. > > Why should we treat hv-evmcs/vmx pair any different? > > > > VMX is not part of Hyper-V enlightenments, is it? It can also be coming > from a CPU model: > > "-cpu MyLovelyModelWithVmx,hv-default" > > should not throw an error! > > Again, the goal is for userspace to not know anything besides > 'hv-default' for Hyper-V enlightenments. > > > > Granted exiting with error is not the best UX, but at least it says to user > > what's wrong with CLI and how to fix it. Also it lets to keep QEMU code > > manageable and with consistent behavior. > > Enabling EVMCS only when guest CPU has VMX is a smart behavior, all > users want that. It is very consistent with how genuine Hyper-V behaves. it's all good, unless VMX is absent for whatever reasons (cpumodel or vmx=off on CLI), in this case just error out and say what's wrong instead of trying to fix CLI up. > >> Again, the end goal is: make it possible for upper layers to now know > >> anything about Hyper-V enlightenments other than 'hv-default'. > > I'm still doubtful about feasibility of this goal when migration is considered. > > It sure would work if hosts are identical hw/sw wise. > > In mixed setup all features, except of hv-evmcs, that included in 'hv-default', > > will error out in case host doesn't support it, which should prevent > > incompatible migration so that's also fine. > > > > But hv-evmcs will silently go away if host doesn't support it, > > which is issue when migration happens to/from host that supports it. > > But how can it go away? In KVM, hv-evmcs support is not conditional, > basically, all KVMs which support netsting state migration also support > EVMCS. you added that in kernel between 4.19-4.20 (8cab6507f64), so on Intel host flag can change depending on not so old kernel version. Unless QEMU minimum supported kernel version is 4.20, we can't ignore that. (downstreams will have to take care of it on its own) > > > > Maybe to help mgmt to figure out hosts compatibility > > 1. it should know about hv-evmcs to query it's status > > 2. or default value set by 'hv-default' should be exposed to mgmt > > so it could compare whole feature-set in one go without being > > aware of individual features. > > > > Additionally on QEMU side for such conditional features we can > > theoretically add a subsection to migration stream when feature > > is enabled, that way we at least can prevent 'successful' migration, > > when destination value doesn't match. But this might already be > > over-engineering on my part. > > I think you're trying to solve an issue which doesn't exist. In case > we're successfully migrating a nested enabled guest, our KVM is modern > enough and supports EVMCS (on Intel, of course). Also, nested state > (which is part of the migration stream) has EVMCS flag, we can't migrate > somewhere where the flag is unsupported. that's what I was missing. Can you point out to the code that makes sure that migration fails? (a comment where hv-evcms is added to default set explaining why it's safe, pls) > Anyway, I feel we're walking in circles. I'm ready to just drop all > EVMCS related bits from this seies to get it merged. This will make the > part which you hate the most ("custom parsers") go away too. We can discuss > EVMCS to death after that and, when we finally decide that user > convenience is actually worth something, we can add 'hv-evmcs' to the > new fine with me, it would be even easier to review if it were just a patch that would add 'hv-defaults', without non must have refactoring. (cleanup/refactoring could be another series) If we can guarantee that hv-evcms won't flip on/off on all supported kernels, I'm also fine with keeping it in default set and error-ing out if vmx ends up in off state. However if it changes, we need to expose 'default set' to mgmt somehow, so it will know that hosts aren't compatible (instead of finding out it hard way in form of failed migration (assuming it fails)) > Eduardo, do you see any way forward here? >
Igor Mammedov <imammedo@redhat.com> writes: > On Wed, 24 Feb 2021 18:00:43 +0100 > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: > >> Igor Mammedov <imammedo@redhat.com> writes: >> >> > On Tue, 23 Feb 2021 19:08:42 +0100 >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> > >> >> Igor Mammedov <imammedo@redhat.com> writes: >> >> >> >> > On Tue, 23 Feb 2021 16:46:50 +0100 >> >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> >> > >> >> >> Igor Mammedov <imammedo@redhat.com> writes: >> >> >> >> >> >> > On Mon, 22 Feb 2021 11:20:34 +0100 >> >> >> > Vitaly Kuznetsov <vkuznets@redhat.com> wrote: >> >> >> > >> >> >> >> Vitaly Kuznetsov <vkuznets@redhat.com> writes: >> >> >> >> >> >> >> >> > Igor Mammedov <imammedo@redhat.com> writes: >> >> >> >> > >> >> >> >> >>> >> >> >> >> >>> We need to distinguish because that would be sane. >> >> >> >> >>> >> >> >> >> >>> Enlightened VMCS is an extension to VMX, it can't be used without >> >> >> >> >>> it. Genuine Hyper-V doesn't have a knob for enabling and disabling it, >> >> >> >> >> ... >> >> >> >> >>> That bein said, if >> >> >> >> >>> guest CPU lacks VMX it is counter-productive to expose EVMCS. However, >> >> >> >> >>> there is a problem with explicit enablement: what should >> >> >> >> >>> >> >> >> >> >>> 'hv-passthrough,hv-evmcs' option do? Just silently drop EVMCS? Doesn't >> >> >> >> >>> sound sane to me. >> >> >> >> >> based on above I'd error out is user asks for unsupported option >> >> >> >> >> i.e. no VMX -> no hv-evmcs - if explicitly asked -> error out >> >> >> >> > >> >> >> >> > That's what I keep telling you but you don't seem to listen. 'Scratch >> >> >> >> > CPU' can't possibly help with this use-case because when you parse >> >> >> >> > >> >> >> >> > 'hv-passthrough,hv-evmcs,vmx=off' you >> >> >> >> > >> >> >> >> > 1) "hv-passthrough" -> set EVMCS bit to '1' as it is supported by the >> >> >> >> > host. >> >> >> >> > >> >> >> >> > 2) 'hv-evmcs' -> keep EVMCS bit '1' >> >> >> >> > >> >> >> >> > 3) 'vmx=off' -> you have no idea where EVMCS bit came from. >> >> >> >> > >> >> >> >> > We have to remember which options were aquired from the host and which >> >> >> >> > were set explicitly by the user. >> >> >> >> >> >> >> >> Igor, >> >> >> >> >> >> >> >> could you please comment on the above? In case my line of thought is >> >> >> >> correct, and it is impossible to distinguish between e.g. >> >> >> >> >> >> >> >> 'hv-passthrough,hv-evmcs,-vmx' >> >> >> >> and >> >> >> >> 'hv-passthrough,-vmx' >> >> >> >> >> >> >> >> without a custom parser (written just exactly the way I did in this >> >> >> >> version, for example) regardless of when 'hv-passthrough' is >> >> >> >> expanded. E.g. we have the exact same problem with >> >> >> >> 'hv-default,hv-evmcs,-vmx'. I that case I see no point in discussing >> >> >> > >> >> >> > right, if we need to distinguish between explicit and implicit hv-evmcs set by >> >> >> > hv-passthrough custom parser probably the way to go. >> >> >> > >> >> >> > However do we need actually need to do it? >> >> >> >> >> >> I think we really need that. See below ... >> >> >> >> >> >> > I'd treat 'hv-passthrough,-vmx' the same way as 'hv-passthrough,hv-evmcs,-vmx' >> >> >> > and it applies not only hv-evmcs but other features hv-passthrough might set >> >> >> > (i.e. if whatever was [un]set by hv-passthrough in combination with other >> >> >> > features results in invalid config, QEMU shall error out instead of magically >> >> >> > altering host provided hv-passthrough value). >> >> >> > >> >> >> > something like: >> >> >> > 'hv-passthrough,-vmx' when hv-passthrough makes hv-evmcs bit set >> >> >> > should result in >> >> >> > error_setg(errp,"'vmx' feature can't be disabled when hv-evmcs is enabled," >> >> >> > " either enable 'vmx' or disable 'hv-evmcs' along with disabling 'vmx'" >> >> >> > >> >> >> > making host's features set, *magically* mutable, depending on other user provided features >> >> >> > is a bit confusing. One would never know what hv-passthrough actually means, and if >> >> >> > enabling/disabling 'random' feature changes it. >> >> >> > >> >> >> > It's cleaner to do just what user asked (whether implicitly or explicitly) and error out >> >> >> > in case it ends up in nonsense configuration. >> >> >> > >> >> >> >> >> >> I don't seem to agree this is a sane behavior, especially if you replace >> >> >> 'hv-passthrough' with 'hv-default' above. Removing 'vmx' from CPU for >> >> >> Windows guests is common if you'd want to avoid nested configuration: >> >> >> even without any Hyper-V guests created, Windows itself is a Hyper-V >> >> >> partition. >> >> >> >> >> >> So a sane user will do: >> >> >> >> >> >> '-cpu host,hv-default,vmx=off' >> >> >> >> >> >> and on Intel he will get an error, and on AMD he won't. >> >> >> >> >> >> So what you're suggesting actually defeats the whole purpose of >> >> >> 'hv-default' as upper-layer tools (think libvirt) will need to know that >> >> > I'd assume it would be hard for libvirt to use 'hv-default' from migration >> >> > point of view. It's semi opaque (one can find out what features it sets >> >> > indirectly inspecting individual hv_foo features, and mgmt will need to >> >> > know about them). If it will mutate when other features [un]set, upper >> >> > layers might need to enumerate all these permutations to know which hosts >> >> > are compatible or compare host feature sets every time before attempting >> >> > migration. >> >> >> >> That's exactly the opposite of what's the goal here which is: make it >> >> possible for upper layers to not know anything about Hyper-V >> >> enlightenments besides 'hv-default'. Migration should work just fine, if >> >> the rest of guest configuration matches -- then 'hv-default' will create >> >> the exact same things (e.g. if 'vmx' was disabled on the source it has >> > ^^^^^ >> > I'm not convinced in that yet (not with current impl. more on that at the end of reply) >> > >> >> to be enabled on the destination, it can't be different) >> >> >> >> >> >> >> Intel configurations for Windows guests are somewhat different. They'll >> >> >> need to know what 'hv-evmcs' is. We're back to where we've started. >> >> > >> >> > we were talking about hv-passthrough, and if host advertises hv-evmcs >> >> > QEMU should complain if user disabled features it depends on ( >> >> > not silently fixing up configuration error). >> >> > But the same applies to hv-default. >> >> >> >> Let's forget about hv-passthrough completely for a while as this series >> >> is kind of unrelated to it. >> > >> > It adds a lot for unrelated code (not just couple of lines), >> > I've played with scratch CPU idea, here is demo of it >> > https://github.com/imammedo/qemu/commit/a4b107d5368ebf72d45082bc8310a6b88a4ba6fb >> > I didn't rework caps/cpuid querying parts (just hacked around it), >> > and even without that it saves us ~200LOC (not a small part of which comes >> > with this series). >> >> All your savings come from throwing away custom parsers -- which are not >> needed at all if we don't distinguish between >> >> 'hv-default,hv-evmcs' and 'hv-default' >> >> it's just not needed, don't count these patches in. Or, if it is needed, >> please explain how your scratch CPU is making things different. I guess >> it is not so we can discuss this outside of this series. > > scratch CPU helps with hostpassthrough refactoring which also brings in > dependency on custom parser. I think I already said that but let me repeat myself: scratch CPUs don't help us in any way to distinguish between 'hv-passthrough,hv-evmcs,vmx=off' and 'hv-passthrough,vmx=off' and thus are utterly useless (for this particular feature). It's not 'refactoring', it's throwing a feature away for the sake of code purity. > > >> > I also split horrible hv_cpuid_check_and_set into separate 'set' and 'check' stages. >> > Granted it was sort-of pre-existing ugly code, some of your >> > re-factoring made it a bit better but it's still far from readable. >> > >> >> hv_cpuid_check_and_set() is already there, I'm not at all opposed to >> making this code even better but I don't see it as a must for this >> particular feature (hv-default). > it's not a must have for hv-default especially if you don't touch it. > (however if you touch it & co, I'd ask to clean it up first) > > >> >> In the previous submission I was setting 'hv-default' based on host >> >> availability of the feature only. That is: set on Intel, unset on >> >> AMD. We have to at least preserve that because it would be insane to >> >> crash on >> >> >> >> -cpu host,hv-default >> >> >> >> on AMD because AMD doesn't (and never will!) support hv-evmcs, right? >> > >> > If QEMU prevents cross arch migration i.e. it's not supported, >> > then I guess we can make hv-default different depending on AMD or Intel host. >> > If not then we might need to be conservative i.e. exclude hv-evmcs from defaults. >> > >> >> Forget about cross vendor. I want to tie it 1:1 to VMX feature on the >> guest CPU -- which happens to be only available on Intel. It is >> absolutely impossible to migrate VMX enabled guest to VMX-disabled >> destination, with or without evmcs. > Ok > >> >> >> If we are to follow this approach let's just throw away 'hv-evmcs' from >> >> >> 'hv-default' set, it's going to be much cleaner. But again, I don't >> >> >> really believe it's the right way to go. >> >> > >> >> > if desired behavior, on Intel host for above config, to start without error >> >> > then indeed defaults should not set 'hv-evmcs' if it results in invalid >> >> > feature set. >> >> >> >> This is problematic as it is still sane for everyone to enable it as it >> >> gives performance advantage. If we just for a second forget about custom >> > " >> > > >> So a sane user will do: >> > > >> >> > > >> '-cpu host,hv-default,vmx=off' >> > " >> > it's not easy picking defaults. >> > >> >> parsers and all that -- which is just an implementation detail, why can't >> >> we tie 'hv-evmcs' bit in 'hv-default' to 'vxm' 1:1? >> > migration wise I don't see issues wrt vmx=off turning of hv-evmcs, >> > however ... >> > >> > we were replacing user input fixups with hard errors asking >> > user to fix CLI and removing custom parsers in favor of generic ones. >> > >> > In vmx=off case we would be fixing up what 'hv-default' explicitly set. >> > Same applies to other hv-foo set by hv-default. >> > >> > ex: 'hv-default,hv-dep1=off', will turn off some dependent feature >> > for other hv feature in hv-default set and it will error out, >> > same goes on for enabling feature that has dependencies. >> > Why should we treat hv-evmcs/vmx pair any different? >> > >> >> VMX is not part of Hyper-V enlightenments, is it? It can also be coming >> from a CPU model: >> >> "-cpu MyLovelyModelWithVmx,hv-default" >> >> should not throw an error! >> >> Again, the goal is for userspace to not know anything besides >> 'hv-default' for Hyper-V enlightenments. >> >> >> > Granted exiting with error is not the best UX, but at least it says to user >> > what's wrong with CLI and how to fix it. Also it lets to keep QEMU code >> > manageable and with consistent behavior. >> >> Enabling EVMCS only when guest CPU has VMX is a smart behavior, all >> users want that. It is very consistent with how genuine Hyper-V behaves. > it's all good, > unless VMX is absent for whatever reasons (cpumodel or vmx=off on CLI), > in this case just error out and say what's wrong instead of trying to fix > CLI up. It's easier, of course, but I don't think we should do that. At least query-cpu-model-expansion type=full model={"name":"host","props":{"hv-passthrough":true}} should give upper layer the full list of supported features, including evmcs, and not just throw an error. Same with 'hv-default' > >> >> Again, the end goal is: make it possible for upper layers to now know >> >> anything about Hyper-V enlightenments other than 'hv-default'. >> > I'm still doubtful about feasibility of this goal when migration is considered. >> > It sure would work if hosts are identical hw/sw wise. >> > In mixed setup all features, except of hv-evmcs, that included in 'hv-default', >> > will error out in case host doesn't support it, which should prevent >> > incompatible migration so that's also fine. >> > >> > But hv-evmcs will silently go away if host doesn't support it, >> > which is issue when migration happens to/from host that supports it. >> >> But how can it go away? In KVM, hv-evmcs support is not conditional, >> basically, all KVMs which support netsting state migration also support >> EVMCS. > > you added that in kernel between 4.19-4.20 (8cab6507f64), > so on Intel host flag can change depending on not so old kernel version. > > Unless QEMU minimum supported kernel version is 4.20, > we can't ignore that. > (downstreams will have to take care of it on its own) You forget that EVMCS is not a feature in vacuum, it is an extension to VMX. To migrate a guest which has it enabled, nested state migration has to support it. Assuming the feature was enabled on the source, you need at least the following: commit 8cab6507f64eff0ccfea01fccbc7e3e05e2aaf7e Author: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue Oct 16 18:50:09 2018 +0200 x86/kvm/nVMX: nested state migration for Enlightened VMCS which is also in 4.20, I don't see how migration without it can even succeed. > >> > >> > Maybe to help mgmt to figure out hosts compatibility >> > 1. it should know about hv-evmcs to query it's status >> > 2. or default value set by 'hv-default' should be exposed to mgmt >> > so it could compare whole feature-set in one go without being >> > aware of individual features. >> > >> > Additionally on QEMU side for such conditional features we can >> > theoretically add a subsection to migration stream when feature >> > is enabled, that way we at least can prevent 'successful' migration, >> > when destination value doesn't match. But this might already be >> > over-engineering on my part. >> >> I think you're trying to solve an issue which doesn't exist. In case >> we're successfully migrating a nested enabled guest, our KVM is modern >> enough and supports EVMCS (on Intel, of course). Also, nested state >> (which is part of the migration stream) has EVMCS flag, we can't migrate >> somewhere where the flag is unsupported. > that's what I was missing. > Can you point out to the code that makes sure that migration fails? > (a comment where hv-evcms is added to default set explaining why it's safe, pls) > If you look at KVM_SET_NESTED_STATE implementation in KVM (see kvm_arch_vcpu_ioctl()), it checks for supported flags arch/x86/kvm/x86.c- if (kvm_state.flags & arch/x86/kvm/x86.c- ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE arch/x86/kvm/x86.c- | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING arch/x86/kvm/x86.c- | KVM_STATE_NESTED_GIF_SET)) arch/x86/kvm/x86.c- break; if KVM_STATE_NESTED_EVMCS is unsupported (KVM before 8cab6507f64) this condition will fail. In reality, regardless of evmcs, there are so many bugs in nested migration prior to 5.11/5.10(maybe) that chances for successful migration on these kernels are pretty slim. Worst is, the migration won't fail, you guest (with its nested guests) will just crash. I don't recommend anyone doing nested migration in production with some older kernel, it's just not safe. > >> Anyway, I feel we're walking in circles. I'm ready to just drop all >> EVMCS related bits from this seies to get it merged. This will make the >> part which you hate the most ("custom parsers") go away too. We can discuss >> EVMCS to death after that and, when we finally decide that user >> convenience is actually worth something, we can add 'hv-evmcs' to the >> new > > fine with me, it would be even easier to review if it were just a patch that > would add 'hv-defaults', without non must have refactoring. > (cleanup/refactoring could be another series) > Ok, v5 is on the list. > If we can guarantee that hv-evcms won't flip on/off on all supported kernels, > I'm also fine with keeping it in default set and error-ing out if vmx ends up > in off state. I still don't see why and how it should (or could) flip. > However if it changes, we need to expose 'default set' to mgmt somehow, > so it will know that hosts aren't compatible (instead of finding out it hard way > in form of failed migration (assuming it fails)) The first part of this series does exactly that: exposes Hyper-V enlightenments to upper layers in QMP. Upper layer can do e.g. query-cpu-model-expansion type=full model={"name":"host","props":{"hv-passthrough":true}} and it will get all the supported features, including evmcs. This is one of the reasons I insist this should *always* work instead of throwing an error (with 'hv-default too).
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c4e8863c7ca0..e8a004c39d04 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4553,6 +4553,178 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, const char *name, cpu->env.tsc_khz = cpu->env.user_tsc_khz = value / 1000; } +static bool x86_hv_feature_get(Object *obj, int feature) +{ + X86CPU *cpu = X86_CPU(obj); + + return cpu->hyperv_features & BIT(feature); +} + +static void x86_hv_feature_set(Object *obj, bool value, int feature) +{ + X86CPU *cpu = X86_CPU(obj); + + if (value) { + cpu->hyperv_features |= BIT(feature); + cpu->hyperv_features_on |= BIT(feature); + cpu->hyperv_features_off &= ~BIT(feature); + } else { + cpu->hyperv_features &= ~BIT(feature); + cpu->hyperv_features_on &= ~BIT(feature); + cpu->hyperv_features_off |= BIT(feature); + } +} + +static bool x86_hv_relaxed_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_RELAXED); +} + +static void x86_hv_relaxed_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_RELAXED); +} + +static bool x86_hv_vapic_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_VAPIC); +} + +static void x86_hv_vapic_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_VAPIC); +} + +static bool x86_hv_time_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_TIME); +} + +static void x86_hv_time_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_TIME); +} + +static bool x86_hv_crash_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_CRASH); +} + +static void x86_hv_crash_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_CRASH); +} + +static bool x86_hv_reset_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_RESET); +} + +static void x86_hv_reset_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_RESET); +} + +static bool x86_hv_vpindex_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_VPINDEX); +} + +static void x86_hv_vpindex_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_VPINDEX); +} + +static bool x86_hv_runtime_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_RUNTIME); +} + +static void x86_hv_runtime_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_RUNTIME); +} + +static bool x86_hv_synic_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_SYNIC); +} + +static void x86_hv_synic_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_SYNIC); +} + +static bool x86_hv_stimer_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_STIMER); +} + +static void x86_hv_stimer_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_STIMER); +} + +static bool x86_hv_frequencies_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_FREQUENCIES); +} + +static void x86_hv_frequencies_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_FREQUENCIES); +} + +static bool x86_hv_reenlightenment_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_REENLIGHTENMENT); +} + +static void x86_hv_reenlightenment_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_REENLIGHTENMENT); +} + +static bool x86_hv_tlbflush_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_TLBFLUSH); +} + +static void x86_hv_tlbflush_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_TLBFLUSH); +} + +static bool x86_hv_evmcs_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_EVMCS); +} + +static void x86_hv_evmcs_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_EVMCS); +} + +static bool x86_hv_ipi_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_IPI); +} + +static void x86_hv_ipi_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_IPI); +} + +static bool x86_hv_stimer_direct_get(Object *obj, Error **errp) +{ + return x86_hv_feature_get(obj, HYPERV_FEAT_STIMER_DIRECT); +} + +static void x86_hv_stimer_direct_set(Object *obj, bool value, Error **errp) +{ + x86_hv_feature_set(obj, value, HYPERV_FEAT_STIMER_DIRECT); +} + /* Generic getter for "feature-words" and "filtered-features" properties */ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, const char *name, void *opaque, @@ -7107,36 +7279,6 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("hv-spinlocks", X86CPU, hyperv_spinlock_attempts, HYPERV_SPINLOCK_NEVER_NOTIFY), - DEFINE_PROP_BIT64("hv-relaxed", X86CPU, hyperv_features, - HYPERV_FEAT_RELAXED, 0), - DEFINE_PROP_BIT64("hv-vapic", X86CPU, hyperv_features, - HYPERV_FEAT_VAPIC, 0), - DEFINE_PROP_BIT64("hv-time", X86CPU, hyperv_features, - HYPERV_FEAT_TIME, 0), - DEFINE_PROP_BIT64("hv-crash", X86CPU, hyperv_features, - HYPERV_FEAT_CRASH, 0), - DEFINE_PROP_BIT64("hv-reset", X86CPU, hyperv_features, - HYPERV_FEAT_RESET, 0), - DEFINE_PROP_BIT64("hv-vpindex", X86CPU, hyperv_features, - HYPERV_FEAT_VPINDEX, 0), - DEFINE_PROP_BIT64("hv-runtime", X86CPU, hyperv_features, - HYPERV_FEAT_RUNTIME, 0), - DEFINE_PROP_BIT64("hv-synic", X86CPU, hyperv_features, - HYPERV_FEAT_SYNIC, 0), - DEFINE_PROP_BIT64("hv-stimer", X86CPU, hyperv_features, - HYPERV_FEAT_STIMER, 0), - DEFINE_PROP_BIT64("hv-frequencies", X86CPU, hyperv_features, - HYPERV_FEAT_FREQUENCIES, 0), - DEFINE_PROP_BIT64("hv-reenlightenment", X86CPU, hyperv_features, - HYPERV_FEAT_REENLIGHTENMENT, 0), - DEFINE_PROP_BIT64("hv-tlbflush", X86CPU, hyperv_features, - HYPERV_FEAT_TLBFLUSH, 0), - DEFINE_PROP_BIT64("hv-evmcs", X86CPU, hyperv_features, - HYPERV_FEAT_EVMCS, 0), - DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, - HYPERV_FEAT_IPI, 0), - DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features, - HYPERV_FEAT_STIMER_DIRECT, 0), DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), @@ -7283,6 +7425,41 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) x86_cpu_get_crash_info_qom, NULL, NULL, NULL); #endif + object_class_property_add_bool(oc, "hv-relaxed", + x86_hv_relaxed_get, x86_hv_relaxed_set); + object_class_property_add_bool(oc, "hv-vapic", + x86_hv_vapic_get, x86_hv_vapic_set); + object_class_property_add_bool(oc, "hv-time", + x86_hv_time_get, x86_hv_time_set); + object_class_property_add_bool(oc, "hv-crash", + x86_hv_crash_get, x86_hv_crash_set); + object_class_property_add_bool(oc, "hv-reset", + x86_hv_reset_get, x86_hv_reset_set); + object_class_property_add_bool(oc, "hv-vpindex", + x86_hv_vpindex_get, x86_hv_vpindex_set); + object_class_property_add_bool(oc, "hv-runtime", + x86_hv_runtime_get, x86_hv_runtime_set); + object_class_property_add_bool(oc, "hv-synic", + x86_hv_synic_get, x86_hv_synic_set); + object_class_property_add_bool(oc, "hv-stimer", + x86_hv_stimer_get, x86_hv_stimer_set); + object_class_property_add_bool(oc, "hv-frequencies", + x86_hv_frequencies_get, + x86_hv_frequencies_set); + object_class_property_add_bool(oc, "hv-reenlightenment", + x86_hv_reenlightenment_get, + x86_hv_reenlightenment_set); + object_class_property_add_bool(oc, "hv-tlbflush", + x86_hv_tlbflush_get, x86_hv_tlbflush_set); + object_class_property_add_bool(oc, "hv-evmcs", + x86_hv_evmcs_get, + x86_hv_evmcs_set); + object_class_property_add_bool(oc, "hv-ipi", + x86_hv_ipi_get, x86_hv_ipi_set); + object_class_property_add_bool(oc, "hv-stimer-direct", + x86_hv_stimer_direct_get, + x86_hv_stimer_direct_set); + for (w = 0; w < FEATURE_WORDS; w++) { int bitnr; for (bitnr = 0; bitnr < 64; bitnr++) { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 7ea14822aab5..b4fbd46f0fc9 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1667,6 +1667,8 @@ struct X86CPU { char *hyperv_vendor; bool hyperv_synic_kvm_only; uint64_t hyperv_features; + uint64_t hyperv_features_on; + uint64_t hyperv_features_off; bool hyperv_passthrough; OnOffAuto hyperv_no_nonarch_cs; uint32_t hyperv_vendor_id[3];
Sometimes we'd like to know which features were explicitly enabled and which were explicitly disabled on the command line. E.g. it seems logical to handle 'hv_passthrough,hv_feature=off' as "enable everything supported by the host except for hv_feature" but this doesn't seem to be possible with the current 'hyperv_features' bit array. Introduce 'hv_features_on'/'hv_features_off' add-ons and track explicit enablement/disablement there. Note, it doesn't seem to be possible to fill 'hyperv_features' array during CPU creation time when 'hv-passthrough' is specified and we're running on an older kernel without KVM_CAP_SYS_HYPERV_CPUID support. To get the list of the supported Hyper-V features we need to actually create KVM VCPU and this happens much later. No functional change intended. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- target/i386/cpu.c | 237 ++++++++++++++++++++++++++++++++++++++++------ target/i386/cpu.h | 2 + 2 files changed, 209 insertions(+), 30 deletions(-)