Message ID | 1482866480-26208-5-git-send-email-ehabkost@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > Instead of blocking migration on the source when invtsc is > enabled, rely on the migration destination to ensure there's no > TSC frequency mismatch. > > We can't allow migration unconditionally because we don't know if > the destination is a QEMU version that is really going to ensure > there's no TSC frequency mismatch. To ensure we are migrating to > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > migration only on pc-*-2.9 and newer. > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > --- > include/hw/i386/pc.h | 7 ++++++- > target/i386/cpu.h | 1 + > target/i386/cpu.c | 1 + > target/i386/kvm.c | 15 +++++++++------ > 4 files changed, 17 insertions(+), 7 deletions(-) > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index ceeacca..4270923 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -375,7 +375,12 @@ int e820_get_num_entries(void); > bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); > > #define PC_COMPAT_2_8 \ > - HW_COMPAT_2_8 > + HW_COMPAT_2_8 \ > + {\ > + .driver = TYPE_X86_CPU,\ > + .property = "invtsc-migration",\ > + .value = "off",\ > + }, > > #define PC_COMPAT_2_7 \ > HW_COMPAT_2_7 \ > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index a7f2f60..ec8cdbc 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1208,6 +1208,7 @@ struct X86CPU { > bool expose_kvm; > bool migratable; > bool host_features; > + bool invtsc_migration; > uint32_t apic_id; > > /* if true the CPUID code directly forward host cache leaves to the guest */ > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index b0640f1..cc93b81 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -3678,6 +3678,7 @@ static Property x86_cpu_properties[] = { > DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), > DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), > DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), > + DEFINE_PROP_BOOL("invtsc-migration", X86CPU, invtsc_migration, true), > DEFINE_PROP_END_OF_LIST() > }; > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > index 6a51399..2c3ee7b 100644 > --- a/target/i386/kvm.c > +++ b/target/i386/kvm.c > @@ -962,7 +962,7 @@ int kvm_arch_init_vcpu(CPUState *cs) > has_msr_mcg_ext_ctl = has_msr_feature_control = true; > } > > - if (!env->user_tsc_khz) { > + if (!cpu->invtsc_migration && !env->user_tsc_khz) { > if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > invtsc_mig_blocker == NULL) { > /* for migration */ > @@ -972,6 +972,7 @@ int kvm_arch_init_vcpu(CPUState *cs) > migrate_add_blocker(invtsc_mig_blocker); > /* for savevm */ > vmstate_x86_cpu.unmigratable = 1; > + } > } > > cpuid_data.cpuid.padding = 0; > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > } > > if (level == KVM_PUT_FULL_STATE) { > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > - * because TSC frequency mismatch shouldn't abort migration, > - * unless the user explicitly asked for a more strict TSC > - * setting (e.g. using an explicit "tsc-freq" option). > + /* Migration TSC frequency mismatch is fatal only if we are > + * actually reporting Invariant TSC to the guest. > */ > - kvm_arch_set_tsc_khz(cpu); > + ret = kvm_arch_set_tsc_khz(cpu); > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > + ret < 0) { > + return ret; > + } > } Will the guest continue in the source in this case? I think this is past the point where migration has been declared successful. Otherwise looks good.
On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > Instead of blocking migration on the source when invtsc is > > enabled, rely on the migration destination to ensure there's no > > TSC frequency mismatch. > > > > We can't allow migration unconditionally because we don't know if > > the destination is a QEMU version that is really going to ensure > > there's no TSC frequency mismatch. To ensure we are migrating to > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > migration only on pc-*-2.9 and newer. > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > > --- > > include/hw/i386/pc.h | 7 ++++++- > > target/i386/cpu.h | 1 + > > target/i386/cpu.c | 1 + > > target/i386/kvm.c | 15 +++++++++------ > > 4 files changed, 17 insertions(+), 7 deletions(-) > > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > index ceeacca..4270923 100644 > > --- a/include/hw/i386/pc.h > > +++ b/include/hw/i386/pc.h > > @@ -375,7 +375,12 @@ int e820_get_num_entries(void); > > bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); > > > > #define PC_COMPAT_2_8 \ > > - HW_COMPAT_2_8 > > + HW_COMPAT_2_8 \ > > + {\ > > + .driver = TYPE_X86_CPU,\ > > + .property = "invtsc-migration",\ > > + .value = "off",\ > > + }, > > > > #define PC_COMPAT_2_7 \ > > HW_COMPAT_2_7 \ > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > > index a7f2f60..ec8cdbc 100644 > > --- a/target/i386/cpu.h > > +++ b/target/i386/cpu.h > > @@ -1208,6 +1208,7 @@ struct X86CPU { > > bool expose_kvm; > > bool migratable; > > bool host_features; > > + bool invtsc_migration; > > uint32_t apic_id; > > > > /* if true the CPUID code directly forward host cache leaves to the guest */ > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index b0640f1..cc93b81 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -3678,6 +3678,7 @@ static Property x86_cpu_properties[] = { > > DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), > > DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), > > DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), > > + DEFINE_PROP_BOOL("invtsc-migration", X86CPU, invtsc_migration, true), > > DEFINE_PROP_END_OF_LIST() > > }; > > > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > > index 6a51399..2c3ee7b 100644 > > --- a/target/i386/kvm.c > > +++ b/target/i386/kvm.c > > @@ -962,7 +962,7 @@ int kvm_arch_init_vcpu(CPUState *cs) > > has_msr_mcg_ext_ctl = has_msr_feature_control = true; > > } > > > > - if (!env->user_tsc_khz) { > > + if (!cpu->invtsc_migration && !env->user_tsc_khz) { > > if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > invtsc_mig_blocker == NULL) { > > /* for migration */ > > @@ -972,6 +972,7 @@ int kvm_arch_init_vcpu(CPUState *cs) > > migrate_add_blocker(invtsc_mig_blocker); > > /* for savevm */ > > vmstate_x86_cpu.unmigratable = 1; > > + } > > } > > > > cpuid_data.cpuid.padding = 0; > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > } > > > > if (level == KVM_PUT_FULL_STATE) { > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > - * because TSC frequency mismatch shouldn't abort migration, > > - * unless the user explicitly asked for a more strict TSC > > - * setting (e.g. using an explicit "tsc-freq" option). > > + /* Migration TSC frequency mismatch is fatal only if we are > > + * actually reporting Invariant TSC to the guest. > > */ > > - kvm_arch_set_tsc_khz(cpu); > > + ret = kvm_arch_set_tsc_khz(cpu); > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > + ret < 0) { > > + return ret; > > + } > > } > > Will the guest continue in the source in this case? > > I think this is past the point where migration has been declared > successful. > > Otherwise looks good. Good point. I will make additional tests and see if there's some other place where the kvm_arch_set_tsc_khz() call can be moved to.
On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > Instead of blocking migration on the source when invtsc is > > > enabled, rely on the migration destination to ensure there's no > > > TSC frequency mismatch. > > > > > > We can't allow migration unconditionally because we don't know if > > > the destination is a QEMU version that is really going to ensure > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > migration only on pc-*-2.9 and newer. > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> [...] > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > } > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > - * because TSC frequency mismatch shouldn't abort migration, > > > - * unless the user explicitly asked for a more strict TSC > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > + * actually reporting Invariant TSC to the guest. > > > */ > > > - kvm_arch_set_tsc_khz(cpu); > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > + ret < 0) { > > > + return ret; > > > + } > > > } > > > > Will the guest continue in the source in this case? > > > > I think this is past the point where migration has been declared > > successful. > > > > Otherwise looks good. > > Good point. I will make additional tests and see if there's some > other place where the kvm_arch_set_tsc_khz() call can be moved > to. So, if we solve this and do something on (for example) post_load, we still have a problem: device state is migrated after RAM. This means QEMU will check for TSC scaling and abort migration very late. We could solve that by manually registering a SaveVMHandler that will send the TSC frequency on save_live_setup, so migration is aborted earlier. But: this sounds like just a complex hack to work around the real problems: 1) TSC frequency is guest-visible, and anything that affects guest ABI should depend on the VM configuration, not on host capabilities; 2) Setting TSC frequency depending on the host will make migratability unpredictable for management software: the same VM config could be migratable to host A when started on host B, but not migratable to host A when started on host C. I suggest we allow migration with invtsc if and only if tsc-frequency is set explicitly by management software. In other words, apply only patches 1/4 and 2/4 from this series. After that, we will need libvirt support for configuring tsc-frequency.
On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > > Instead of blocking migration on the source when invtsc is > > > > enabled, rely on the migration destination to ensure there's no > > > > TSC frequency mismatch. > > > > > > > > We can't allow migration unconditionally because we don't know if > > > > the destination is a QEMU version that is really going to ensure > > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > > migration only on pc-*-2.9 and newer. > > > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > [...] > > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > > } > > > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > > - * because TSC frequency mismatch shouldn't abort migration, > > > > - * unless the user explicitly asked for a more strict TSC > > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > > + * actually reporting Invariant TSC to the guest. > > > > */ > > > > - kvm_arch_set_tsc_khz(cpu); > > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > > + ret < 0) { > > > > + return ret; > > > > + } > > > > } > > > > > > Will the guest continue in the source in this case? > > > > > > I think this is past the point where migration has been declared > > > successful. > > > > > > Otherwise looks good. > > > > Good point. I will make additional tests and see if there's some > > other place where the kvm_arch_set_tsc_khz() call can be moved > > to. > > So, if we solve this and do something on (for example) post_load, > we still have a problem: device state is migrated after RAM. This > means QEMU will check for TSC scaling and abort migration very > late. > > We could solve that by manually registering a SaveVMHandler that > will send the TSC frequency on save_live_setup, so migration is > aborted earlier. > > But: this sounds like just a complex hack to work around the real > problems: > > 1) TSC frequency is guest-visible, and anything that affects > guest ABI should depend on the VM configuration, not on host > capabilities; Well not really: the TSC frequency where the guest starts is the frequency the guest software expects. So it does depend on host capabilities. > 2) Setting TSC frequency depending on the host will make > migratability unpredictable for management software: the same > VM config could be migratable to host A when started on host > B, but not migratable to host A when started on host C. Well, just check the frequency. > I suggest we allow migration with invtsc if and only if > tsc-frequency is set explicitly by management software. In other > words, apply only patches 1/4 and 2/4 from this series. After > that, we will need libvirt support for configuring tsc-frequency. I don't like that for the following reasons: * It moves low level complexity from QEMU/KVM to libvirt (libvirt has to call KVM_GET_TSC_KHZ from a vcpu thread). * It makes it difficult to run QEMU manually (i use QEMU manually all the time). * It requires patching libvirt. In my experience things work better when the functionality is not split across libvirt/qemu. Can't this be fixed in QEMU? Just check that destination host supports TSC scaling before migration (or that KVM_GET_TSC_KHZ return value matches on source and destination).
On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > > > Instead of blocking migration on the source when invtsc is > > > > > enabled, rely on the migration destination to ensure there's no > > > > > TSC frequency mismatch. > > > > > > > > > > We can't allow migration unconditionally because we don't know if > > > > > the destination is a QEMU version that is really going to ensure > > > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > > > migration only on pc-*-2.9 and newer. > > > > > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > > [...] > > > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > > > } > > > > > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > > > - * because TSC frequency mismatch shouldn't abort migration, > > > > > - * unless the user explicitly asked for a more strict TSC > > > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > > > + * actually reporting Invariant TSC to the guest. > > > > > */ > > > > > - kvm_arch_set_tsc_khz(cpu); > > > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > > > + ret < 0) { > > > > > + return ret; > > > > > + } > > > > > } > > > > > > > > Will the guest continue in the source in this case? > > > > > > > > I think this is past the point where migration has been declared > > > > successful. > > > > > > > > Otherwise looks good. > > > > > > Good point. I will make additional tests and see if there's some > > > other place where the kvm_arch_set_tsc_khz() call can be moved > > > to. > > > > So, if we solve this and do something on (for example) post_load, > > we still have a problem: device state is migrated after RAM. This > > means QEMU will check for TSC scaling and abort migration very > > late. > > > > We could solve that by manually registering a SaveVMHandler that > > will send the TSC frequency on save_live_setup, so migration is > > aborted earlier. > > > > But: this sounds like just a complex hack to work around the real > > problems: > > > > 1) TSC frequency is guest-visible, and anything that affects > > guest ABI should depend on the VM configuration, not on host > > capabilities; > > Well not really: the TSC frequency where the guest starts > is the frequency the guest software expects. > So it does depend on host capabilities. Could you explain where this expectation comes from? I can see multiple cases where choosing the TSC frequency where the VM starts is not the best option. I am considering two possible scenarios below. You probably have another scenario in mind, so it would be useful if you could describe it so we can consider possible solutions. Scenario 1: You have two hosts: A and B, both with TSC scaling. They have different frequencies. The VM can be started on either one of them, and can be migrated to either one depending on the policy of management software. Maybe B is a backup host, the VM is expected to run most times on host A, and we want to use the TSC frequency from host A every time. Maybe it's the opposite and we want to use the frequency of B. Maybe we want to use the lowest frequency of both, maybe the highest one. We (QEMU developers) can recommend policy to libvirt developers, but a given QEMU process doesn't have information to decide what's best. Scenario 2: Host A has TSC scaling, host B doesn't have TSC scaling. We want to be able to start the VM on host A, and migrate to B. In this case, the only possible solution is to use B's frequency when starting the VM. The QEMU process doesn't have enough information to make that decision. > > > 2) Setting TSC frequency depending on the host will make > > migratability unpredictable for management software: the same > > VM config could be migratable to host A when started on host > > B, but not migratable to host A when started on host C. > > Well, just check the frequency. What do you mean by "just check the frequency"? What exactly should management software do? Whatever management software do, if you just use the source host frequency you can get into a situation where you can start a VM on host A and migrate it to B, but can't start the VM on host B and migrate it to A. This would be confusing for users, and probably break assumptions of existing management software. > > > I suggest we allow migration with invtsc if and only if > > tsc-frequency is set explicitly by management software. In other > > words, apply only patches 1/4 and 2/4 from this series. After > > that, we will need libvirt support for configuring tsc-frequency. > > I don't like that for the following reasons: > > * It moves low level complexity from QEMU/KVM to libvirt > (libvirt has to call KVM_GET_TSC_KHZ from a vcpu thread). It doesn't. It could ask QEMU for that information (the new query-cpu-model-expansion QMP command would allow that). Or, alternatively, it could just let the user choose the frequency. It's probably acceptable for many use cases where invtsc+migration is important. > * It makes it difficult to run QEMU manually (i use QEMU > manually all the time). I believe the set of people that: 1) need invtsc; 2) need migration to work; and 3) run QEMU manually will be able to add "tsc-frequency=XXX" to the command-line easily. :) > * It requires patching libvirt. > > In my experience things work better when the functionality is > not split across libvirt/qemu. In my experience things break when management software is the only component able to make a decision but we don't provide mechanisms to let management software make that decision. The TSC frequency affects migratability to hosts. Choose the wrong frequency, and you might not be able to migrate. Only management software knows to which hosts the VM could be migrated in the future, and which TSC frequency would allow that. > > Can't this be fixed in QEMU? Just check that destination host supports > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > matches on source and destination). > This solves only one use case: where you want to expose the starting host TSC frequency to the guest. It doesn't cover any scenario where the TSC frequency of the starting host isn't the best one. (See the two scenarios I described above) You seem to have a specific use case in mind. If you describe it, we could decide if it's worth the extra migration code complexity to deal with invtsc migration without explicit tsc-frequency. By now, I am not convinced yet. Maybe your use case just needs a explicit "invtsc-migration=on" command-line flag without a mechanism to abort migration on mismatch? I can't tell. Note that even if we follow your suggestion and implement an alternative version of patch 4/4 to cover your use case, I will strongly recommend libvirt developers to support configuring TSC frequency if they want to support invtsc + migration without surprising/unpredictable restrictions on migratability.
On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > > > > Instead of blocking migration on the source when invtsc is > > > > > > enabled, rely on the migration destination to ensure there's no > > > > > > TSC frequency mismatch. > > > > > > > > > > > > We can't allow migration unconditionally because we don't know if > > > > > > the destination is a QEMU version that is really going to ensure > > > > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > > > > migration only on pc-*-2.9 and newer. > > > > > > > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > > > [...] > > > > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > > > > } > > > > > > > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > > > > - * because TSC frequency mismatch shouldn't abort migration, > > > > > > - * unless the user explicitly asked for a more strict TSC > > > > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > > > > + * actually reporting Invariant TSC to the guest. > > > > > > */ > > > > > > - kvm_arch_set_tsc_khz(cpu); > > > > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > > > > + ret < 0) { > > > > > > + return ret; > > > > > > + } > > > > > > } > > > > > > > > > > Will the guest continue in the source in this case? > > > > > > > > > > I think this is past the point where migration has been declared > > > > > successful. > > > > > > > > > > Otherwise looks good. > > > > > > > > Good point. I will make additional tests and see if there's some > > > > other place where the kvm_arch_set_tsc_khz() call can be moved > > > > to. > > > > > > So, if we solve this and do something on (for example) post_load, > > > we still have a problem: device state is migrated after RAM. This > > > means QEMU will check for TSC scaling and abort migration very > > > late. > > > > > > We could solve that by manually registering a SaveVMHandler that > > > will send the TSC frequency on save_live_setup, so migration is > > > aborted earlier. > > > > > > But: this sounds like just a complex hack to work around the real > > > problems: > > > > > > 1) TSC frequency is guest-visible, and anything that affects > > > guest ABI should depend on the VM configuration, not on host > > > capabilities; > > > > Well not really: the TSC frequency where the guest starts > > is the frequency the guest software expects. > > So it does depend on host capabilities. > > Could you explain where this expectation comes from? I can see > multiple cases where choosing the TSC frequency where the VM > starts is not the best option. 1. Boot guest. 2. Calibrate TSC. 3. Use delay() with TSC calibration above, or use TSC to measure the passage of time (TSC clock in userspace). > I am considering two possible scenarios below. You probably have > another scenario in mind, so it would be useful if you could > describe it so we can consider possible solutions. > > > Scenario 1: > > You have two hosts: A and B, both with TSC scaling. They have > different frequencies. The VM can be started on either one of > them, and can be migrated to either one depending on the policy > of management software. > > Maybe B is a backup host, the VM is expected to run most times on > host A, and we want to use the TSC frequency from host A every > time. Maybe it's the opposite and we want to use the frequency of > B. Maybe we want to use the lowest frequency of both, maybe the > highest one. We (QEMU developers) can recommend policy to libvirt > developers, but a given QEMU process doesn't have information to > decide what's best. I can't see any practical scenario here, you will always want to start with TSC frequency of the host where the VM was started. If i am mistaken, please describe a practical case. (If a practical scenario comes up, and there is a use-case for setting the TSC frequency on startup, lets say a Windows VM which fails to boot if the TSC frequency is too high, then it should be supported... But the only known scenario to me, applying to 99.999% of cases, is to expose the TSC frequency where the guest booted at). > Scenario 2: > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > to be able to start the VM on host A, and migrate to B. In this > case, the only possible solution is to use B's frequency when > starting the VM. The QEMU process doesn't have enough information > to make that decision. That is a good point. But again, its a special case and should be supported by -cpu xxx,tsc-frequency=zzzz. However, for the vast majority of 99.999% cases, the issue can be handled entirely in QEMU, without libvirt involvement, and without adding extra steps to the management software. > > > 2) Setting TSC frequency depending on the host will make > > > migratability unpredictable for management software: the same > > > VM config could be migratable to host A when started on host > > > B, but not migratable to host A when started on host C. > > > > Well, just check the frequency. > > What do you mean by "just check the frequency"? What exactly > should management software do? VM booted on host-A can be migrated to host-B if TSC frequency matches. > Whatever management software do, if you just use the source host > frequency you can get into a situation where you can start a VM > on host A and migrate it to B, but can't start the VM on host B > and migrate it to A. This would be confusing for users, and > probably break assumptions of existing management software. Well this is a side effect of invariant TSC and migration. Look, i agree with all your points, but my point is this: i personally prefer to handle the 99.999% case, which is the TSC frequency exposed is the one from the host where the guest booted, in QEMU entirely (for example, to make life easier for people who run qemu manually such as myself). > > > I suggest we allow migration with invtsc if and only if > > > tsc-frequency is set explicitly by management software. In other > > > words, apply only patches 1/4 and 2/4 from this series. After > > > that, we will need libvirt support for configuring tsc-frequency. > > > > I don't like that for the following reasons: > > > > * It moves low level complexity from QEMU/KVM to libvirt > > (libvirt has to call KVM_GET_TSC_KHZ from a vcpu thread). > > It doesn't. It could ask QEMU for that information (the new > query-cpu-model-expansion QMP command would allow that). > > Or, alternatively, it could just let the user choose the > frequency. Again, you want to expose the host where the VM booted in most cases (except the ones you list above). > It's probably acceptable for many use cases where > invtsc+migration is important. Ok, thats better. Can you add a patch to your series with the steps as how mgmt software should proceed ? > > * It makes it difficult to run QEMU manually (i use QEMU > > manually all the time). > > I believe the set of people that: 1) need invtsc; 2) need > migration to work; and 3) run QEMU manually will be able to add > "tsc-frequency=XXX" to the command-line easily. :) Ok, so migration is only allowed if tsc-frequency= is specified. > > * It requires patching libvirt. > > > > In my experience things work better when the functionality is > > not split across libvirt/qemu. > > In my experience things break when management software is the > only component able to make a decision but we don't provide > mechanisms to let management software make that decision. > > The TSC frequency affects migratability to hosts. Choose the > wrong frequency, and you might not be able to migrate. Only > management software knows to which hosts the VM could be migrated > in the future, and which TSC frequency would allow that. True. > > Can't this be fixed in QEMU? Just check that destination host supports > > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > > matches on source and destination). > > > > This solves only one use case: where you want to expose the > starting host TSC frequency to the guest. It doesn't cover any > scenario where the TSC frequency of the starting host isn't the > best one. (See the two scenarios I described above) True. > You seem to have a specific use case in mind. If you describe it, > we could decide if it's worth the extra migration code complexity > to deal with invtsc migration without explicit tsc-frequency. By > now, I am not convinced yet. I don't have any specific use case in mind. I'm just trying to avoid moving complexity to libvirt which in my experience is a the best thing to do. > Maybe your use case just needs a explicit "invtsc-migration=on" > command-line flag without a mechanism to abort migration on > mismatch? I can't tell. Again, there is no special case. > Note that even if we follow your suggestion and implement an > alternative version of patch 4/4 to cover your use case, I will > strongly recommend libvirt developers to support configuring TSC > frequency if they want to support invtsc + migration without > surprising/unpredictable restrictions on migratability. Well, alright. If you make the TSC frequency of the host available to mgmt software as you describe, and write the steps mgmt software should take, i'm good. Thanks for the help Eduardo.
On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > Note that even if we follow your suggestion and implement an > > alternative version of patch 4/4 to cover your use case, I will > > strongly recommend libvirt developers to support configuring TSC > > frequency if they want to support invtsc + migration without > > surprising/unpredictable restrictions on migratability. > > Well, alright. If you make the TSC frequency of the host > available to mgmt software as you describe, and write the steps mgmt > software should take, i'm good. > > Thanks for the help Eduardo. > I'll be happy to not have to add a weird script to 1. Query the host TSC frequency. 2. Pass that to -cpu xxx,tsc-frequency=ZZZ To my migration scripts. Probably easy to make that the default behaviour...
On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > > > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > > > > > Instead of blocking migration on the source when invtsc is > > > > > > > enabled, rely on the migration destination to ensure there's no > > > > > > > TSC frequency mismatch. > > > > > > > > > > > > > > We can't allow migration unconditionally because we don't know if > > > > > > > the destination is a QEMU version that is really going to ensure > > > > > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > > > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > > > > > migration only on pc-*-2.9 and newer. > > > > > > > > > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > > > > [...] > > > > > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > > > > > } > > > > > > > > > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > > > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > > > > > - * because TSC frequency mismatch shouldn't abort migration, > > > > > > > - * unless the user explicitly asked for a more strict TSC > > > > > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > > > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > > > > > + * actually reporting Invariant TSC to the guest. > > > > > > > */ > > > > > > > - kvm_arch_set_tsc_khz(cpu); > > > > > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > > > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > > > > > + ret < 0) { > > > > > > > + return ret; > > > > > > > + } > > > > > > > } > > > > > > > > > > > > Will the guest continue in the source in this case? > > > > > > > > > > > > I think this is past the point where migration has been declared > > > > > > successful. > > > > > > > > > > > > Otherwise looks good. > > > > > > > > > > Good point. I will make additional tests and see if there's some > > > > > other place where the kvm_arch_set_tsc_khz() call can be moved > > > > > to. > > > > > > > > So, if we solve this and do something on (for example) post_load, > > > > we still have a problem: device state is migrated after RAM. This > > > > means QEMU will check for TSC scaling and abort migration very > > > > late. > > > > > > > > We could solve that by manually registering a SaveVMHandler that > > > > will send the TSC frequency on save_live_setup, so migration is > > > > aborted earlier. > > > > > > > > But: this sounds like just a complex hack to work around the real > > > > problems: > > > > > > > > 1) TSC frequency is guest-visible, and anything that affects > > > > guest ABI should depend on the VM configuration, not on host > > > > capabilities; > > > > > > Well not really: the TSC frequency where the guest starts > > > is the frequency the guest software expects. > > > So it does depend on host capabilities. > > > > Could you explain where this expectation comes from? I can see > > multiple cases where choosing the TSC frequency where the VM > > starts is not the best option. > > 1. Boot guest. > 2. Calibrate TSC. > 3. Use delay() with TSC calibration above, or > use TSC to measure the passage of time (TSC clock > in userspace). If TSC scaling is available, using a different frequency should be safe, shouldn't it? Otherwise, migrating with TSC scaling wouldn't be safe either. Anyway: I don't disagree the starting host frequency is a good default. It is. But I don't think it's the best option on all cases. > > > I am considering two possible scenarios below. You probably have > > another scenario in mind, so it would be useful if you could > > describe it so we can consider possible solutions. > > > > > > Scenario 1: > > > > You have two hosts: A and B, both with TSC scaling. They have > > different frequencies. The VM can be started on either one of > > them, and can be migrated to either one depending on the policy > > of management software. > > > > Maybe B is a backup host, the VM is expected to run most times on > > host A, and we want to use the TSC frequency from host A every > > time. Maybe it's the opposite and we want to use the frequency of > > B. Maybe we want to use the lowest frequency of both, maybe the > > highest one. We (QEMU developers) can recommend policy to libvirt > > developers, but a given QEMU process doesn't have information to > > decide what's best. > > I can't see any practical scenario here, you will always want > to start with TSC frequency of the host where the VM was started. > > If i am mistaken, please describe a practical case. > > (If a practical scenario comes up, and there is a use-case > for setting the TSC frequency on startup, lets say > a Windows VM which fails to boot if the TSC frequency > is too high, then it should be supported... But the > only known scenario to me, applying to 99.999% of cases, > is to expose the TSC frequency where the guest booted at). I don't have any specific case: my point is that I can't tell what's the best frequency if I don't know where the hosts are expected to be migrated to. You claim that using the starting host frequency is the best option on the vast majority of cases. Maybe it's true, and it would be a good default. The only problem is that this default affects migratability: > > > Scenario 2: > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > to be able to start the VM on host A, and migrate to B. In this > > case, the only possible solution is to use B's frequency when > > starting the VM. The QEMU process doesn't have enough information > > to make that decision. > > That is a good point. But again, its a special case and > should be supported by -cpu xxx,tsc-frequency=zzzz. > > However, for the vast majority of 99.999% cases, the issue > can be handled entirely in QEMU, without libvirt involvement, > and without adding extra steps to the management software. I agree it should cover most cases. The only problem here is that it can break migration in unexpected ways. Then my point is: assuming that libvirt will prefer to require explicit TSC frequency configuration to enable invtsc migration (instead of getting unpredictable migration compatibility), is the added complexity to migration code worth the effort, if choosing an explicit frequency is safer and more predictable? I believe this is where we disagree. > > > > > 2) Setting TSC frequency depending on the host will make > > > > migratability unpredictable for management software: the same > > > > VM config could be migratable to host A when started on host > > > > B, but not migratable to host A when started on host C. > > > > > > Well, just check the frequency. > > > > What do you mean by "just check the frequency"? What exactly > > should management software do? > > VM booted on host-A can be migrated to host-B if TSC > frequency matches. Except when TSC scaling is available. Then you may or may not migrate from A to B, and you don't know that unless you actually try to migrate. > > > Whatever management software do, if you just use the source host > > frequency you can get into a situation where you can start a VM > > on host A and migrate it to B, but can't start the VM on host B > > and migrate it to A. This would be confusing for users, and > > probably break assumptions of existing management software. > > Well this is a side effect of invariant TSC and migration. > > Look, i agree with all your points, but my point is this: i personally > prefer to handle the 99.999% case, which is the TSC frequency exposed is the one > from the host where the guest booted, in QEMU entirely (for example, to > make life easier for people who run qemu manually such as myself). Right. I don't mind not covering 100% of cases. But I worry if the cases not covered by us behave unexpectedly and unpredictably. If we caused a failure every time an unsafe/unsupported config was used, it would be OK. But making the ability to migrate unpredictable is a more serious issue IMO. I believe we both agree about how the final version should behave, but disagree about what is more important in the first version: * My proposal for the first version means: * People would have to configure TSC frequency manually if they want invtsc + migration (until we also provide a mechanism to query the host TSC frequency so management/scripts could choose it as default); * Adding more code to libvirt. * Your proposal for the first version means: * The ability to migrate won't be predictable by libvirt; * Extra complexity on the migration code to ensure we abort migration on mismatch. We seem to be weighting those issues differently. To me, having predictability on migration ability since the first version is more important to me than making configuration easier (on the first version). > > > > > I suggest we allow migration with invtsc if and only if > > > > tsc-frequency is set explicitly by management software. In other > > > > words, apply only patches 1/4 and 2/4 from this series. After > > > > that, we will need libvirt support for configuring tsc-frequency. > > > > > > I don't like that for the following reasons: > > > > > > * It moves low level complexity from QEMU/KVM to libvirt > > > (libvirt has to call KVM_GET_TSC_KHZ from a vcpu thread). > > > > It doesn't. It could ask QEMU for that information (the new > > query-cpu-model-expansion QMP command would allow that). > > > > Or, alternatively, it could just let the user choose the > > frequency. > > Again, you want to expose the host where the VM booted in most > cases (except the ones you list above). > > > It's probably acceptable for many use cases where > > invtsc+migration is important. > > Ok, thats better. Can you add a patch to your series with the steps > as how mgmt software should proceed ? > > > > * It makes it difficult to run QEMU manually (i use QEMU > > > manually all the time). > > > > I believe the set of people that: 1) need invtsc; 2) need > > migration to work; and 3) run QEMU manually will be able to add > > "tsc-frequency=XXX" to the command-line easily. :) > > Ok, so migration is only allowed if tsc-frequency= is specified. > > > > * It requires patching libvirt. > > > > > > In my experience things work better when the functionality is > > > not split across libvirt/qemu. > > > > In my experience things break when management software is the > > only component able to make a decision but we don't provide > > mechanisms to let management software make that decision. > > > > The TSC frequency affects migratability to hosts. Choose the > > wrong frequency, and you might not be able to migrate. Only > > management software knows to which hosts the VM could be migrated > > in the future, and which TSC frequency would allow that. > > True. > > > > Can't this be fixed in QEMU? Just check that destination host supports > > > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > > > matches on source and destination). > > > > > > > This solves only one use case: where you want to expose the > > starting host TSC frequency to the guest. It doesn't cover any > > scenario where the TSC frequency of the starting host isn't the > > best one. (See the two scenarios I described above) > > True. > > > You seem to have a specific use case in mind. If you describe it, > > we could decide if it's worth the extra migration code complexity > > to deal with invtsc migration without explicit tsc-frequency. By > > now, I am not convinced yet. > > I don't have any specific use case in mind. I'm just trying to > avoid moving complexity to libvirt which in my experience is a > the best thing to do. I think both our proposals make things harder for libvirt in different ways. I just want to make the complexity explicit for libvirt, instead of giving them the illusion of safety (making migration break in ways libvirt can't detect). Anyway, your points are still valid and it would be still useful to do what you propose. I will give it a try on a new version of patch 4/4 that can abort migration earlier. But note that I expect some pushback from other maintainers because of the complexity of the code. If that happens, be aware that I won't be able to justify the added complexity because I would still think it's not worth it. > > > Maybe your use case just needs a explicit "invtsc-migration=on" > > command-line flag without a mechanism to abort migration on > > mismatch? I can't tell. > > Again, there is no special case. > > > Note that even if we follow your suggestion and implement an > > alternative version of patch 4/4 to cover your use case, I will > > strongly recommend libvirt developers to support configuring TSC > > frequency if they want to support invtsc + migration without > > surprising/unpredictable restrictions on migratability. > > Well, alright. If you make the TSC frequency of the host > available to mgmt software as you describe, and write the steps mgmt > software should take, i'm good. I plan to. The problem is that the mechanism to query the host frequency may be unavailable in the first version. > > Thanks for the help Eduardo. >
On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > > I am considering two possible scenarios below. You probably have > > > another scenario in mind, so it would be useful if you could > > > describe it so we can consider possible solutions. > > > > > > > > > Scenario 1: > > > > > > You have two hosts: A and B, both with TSC scaling. They have > > > different frequencies. The VM can be started on either one of > > > them, and can be migrated to either one depending on the policy > > > of management software. > > > > > > Maybe B is a backup host, the VM is expected to run most times on > > > host A, and we want to use the TSC frequency from host A every > > > time. Maybe it's the opposite and we want to use the frequency of > > > B. Maybe we want to use the lowest frequency of both, maybe the > > > highest one. We (QEMU developers) can recommend policy to libvirt > > > developers, but a given QEMU process doesn't have information to > > > decide what's best. > > > > I can't see any practical scenario here, you will always want > > to start with TSC frequency of the host where the VM was started. > > > > If i am mistaken, please describe a practical case. > > > > (If a practical scenario comes up, and there is a use-case > > for setting the TSC frequency on startup, lets say > > a Windows VM which fails to boot if the TSC frequency > > is too high, then it should be supported... But the > > only known scenario to me, applying to 99.999% of cases, > > is to expose the TSC frequency where the guest booted at). > > I don't have any specific case: my point is that I can't tell > what's the best frequency if I don't know where the hosts are > expected to be migrated to. > > You claim that using the starting host frequency is the best > option on the vast majority of cases. Maybe it's true, and it > would be a good default. The only problem is that this default > affects migratability: Yep, this is the same scenario as CPU model choice. While using "host" model may be a reasonable default if all the hosts in your cluster use identical CPUs. If you have a mix of CPUs though, and you care about migration, you'll want to determine a common denominator instead. The same applies to TSC frequency choice. So from that POV, the libvirt capabilities XML for a host needs to have a way to report the supported TSC frequency for the host. The mgmt app can then figure out what explicit TSC freq to set for guests that need to be migratable to maximise runnability across the hosts. > > > Scenario 2: > > > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > > to be able to start the VM on host A, and migrate to B. In this > > > case, the only possible solution is to use B's frequency when > > > starting the VM. The QEMU process doesn't have enough information > > > to make that decision. > > > > That is a good point. But again, its a special case and > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > However, for the vast majority of 99.999% cases, the issue > > can be handled entirely in QEMU, without libvirt involvement, > > and without adding extra steps to the management software. > > I agree it should cover most cases. The only problem here is that > it can break migration in unexpected ways. > > Then my point is: assuming that libvirt will prefer to require > explicit TSC frequency configuration to enable invtsc migration > (instead of getting unpredictable migration compatibility), is > the added complexity to migration code worth the effort, if > choosing an explicit frequency is safer and more predictable? I > believe this is where we disagree. I believe that if libvirt detects that QEMU supports the 'tsc-frequency' option, then libvirt should set it by default in the XML, if not already set by the mgmt app. That way, libvirt can validate TSC freq comapt for migration before it even launches QEMU in the target host. Regards, Daniel
On Thu, Jan 05, 2017 at 12:33:51PM +0000, Daniel P. Berrange wrote: > On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: [...] > > > > Scenario 2: > > > > > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > > > to be able to start the VM on host A, and migrate to B. In this > > > > case, the only possible solution is to use B's frequency when > > > > starting the VM. The QEMU process doesn't have enough information > > > > to make that decision. > > > > > > That is a good point. But again, its a special case and > > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > > > However, for the vast majority of 99.999% cases, the issue > > > can be handled entirely in QEMU, without libvirt involvement, > > > and without adding extra steps to the management software. > > > > I agree it should cover most cases. The only problem here is that > > it can break migration in unexpected ways. > > > > Then my point is: assuming that libvirt will prefer to require > > explicit TSC frequency configuration to enable invtsc migration > > (instead of getting unpredictable migration compatibility), is > > the added complexity to migration code worth the effort, if > > choosing an explicit frequency is safer and more predictable? I > > believe this is where we disagree. > > I believe that if libvirt detects that QEMU supports the 'tsc-frequency' > option, then libvirt should set it by default in the XML, if not already > set by the mgmt app. That way, libvirt can validate TSC freq comapt > for migration before it even launches QEMU in the target host. If you do this unconditionally, you have another problem: if tsc-frequency is set explicitly, migration is only possible if TSC frequency of the destination matches[1], or if TSC scaling is supported by the destination. It's a good idea to set a TSC frequency only if invtsc is enabled explicitly in the config. [1] Currently the frequency needs to match exactly. That's a separate issue: we should probably add a knob to allow a slight variation in TSC frequency (e.g. <1% difference).
On Thu, Jan 05, 2017 at 10:48:57AM -0200, Eduardo Habkost wrote: > On Thu, Jan 05, 2017 at 12:33:51PM +0000, Daniel P. Berrange wrote: > > On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > > > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > [...] > > > > > Scenario 2: > > > > > > > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > > > > to be able to start the VM on host A, and migrate to B. In this > > > > > case, the only possible solution is to use B's frequency when > > > > > starting the VM. The QEMU process doesn't have enough information > > > > > to make that decision. > > > > > > > > That is a good point. But again, its a special case and > > > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > > > > > However, for the vast majority of 99.999% cases, the issue > > > > can be handled entirely in QEMU, without libvirt involvement, > > > > and without adding extra steps to the management software. > > > > > > I agree it should cover most cases. The only problem here is that > > > it can break migration in unexpected ways. > > > > > > Then my point is: assuming that libvirt will prefer to require > > > explicit TSC frequency configuration to enable invtsc migration > > > (instead of getting unpredictable migration compatibility), is > > > the added complexity to migration code worth the effort, if > > > choosing an explicit frequency is safer and more predictable? I > > > believe this is where we disagree. > > > > I believe that if libvirt detects that QEMU supports the 'tsc-frequency' > > option, then libvirt should set it by default in the XML, if not already > > set by the mgmt app. That way, libvirt can validate TSC freq comapt > > for migration before it even launches QEMU in the target host. > > If you do this unconditionally, you have another problem: if > tsc-frequency is set explicitly, migration is only possible if > TSC frequency of the destination matches[1], or if TSC scaling is > supported by the destination. It's a good idea to set a TSC > frequency only if invtsc is enabled explicitly in the config. If we don't set tsc-frequency and the TSC frequency doesn't match, does that mean the guest migration succeed, but suddenly sees different TSC frequency ? I guess we we allowed that historically we can't break that now, so setting it only if invtsc is set seems reasonable. > > [1] Currently the frequency needs to match exactly. That's a > separate issue: we should probably add a knob to allow a slight > variation in TSC frequency (e.g. <1% difference). Regards, Daniel
On Thu, Jan 05, 2017 at 01:00:32PM +0000, Daniel P. Berrange wrote: > On Thu, Jan 05, 2017 at 10:48:57AM -0200, Eduardo Habkost wrote: > > On Thu, Jan 05, 2017 at 12:33:51PM +0000, Daniel P. Berrange wrote: > > > On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > > > > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > > > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > [...] > > > > > > Scenario 2: > > > > > > > > > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > > > > > to be able to start the VM on host A, and migrate to B. In this > > > > > > case, the only possible solution is to use B's frequency when > > > > > > starting the VM. The QEMU process doesn't have enough information > > > > > > to make that decision. > > > > > > > > > > That is a good point. But again, its a special case and > > > > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > > > > > > > However, for the vast majority of 99.999% cases, the issue > > > > > can be handled entirely in QEMU, without libvirt involvement, > > > > > and without adding extra steps to the management software. > > > > > > > > I agree it should cover most cases. The only problem here is that > > > > it can break migration in unexpected ways. > > > > > > > > Then my point is: assuming that libvirt will prefer to require > > > > explicit TSC frequency configuration to enable invtsc migration > > > > (instead of getting unpredictable migration compatibility), is > > > > the added complexity to migration code worth the effort, if > > > > choosing an explicit frequency is safer and more predictable? I > > > > believe this is where we disagree. > > > > > > I believe that if libvirt detects that QEMU supports the 'tsc-frequency' > > > option, then libvirt should set it by default in the XML, if not already > > > set by the mgmt app. That way, libvirt can validate TSC freq comapt > > > for migration before it even launches QEMU in the target host. > > > > If you do this unconditionally, you have another problem: if > > tsc-frequency is set explicitly, migration is only possible if > > TSC frequency of the destination matches[1], or if TSC scaling is > > supported by the destination. It's a good idea to set a TSC > > frequency only if invtsc is enabled explicitly in the config. > > If we don't set tsc-frequency and the TSC frequency doesn't > match, does that mean the guest migration succeed, but suddenly > sees different TSC frequency ? If TSC scaling is unavailable, yes. If the destination host supports TSC scaling, we automatically keep the original TSC frequency on migration. > > I guess we we allowed that historically we can't break that > now, so setting it only if invtsc is set seems reasonable. I don't think we really had a choice, KVM would be much less useful if we didn't allow migration between hosts with different frequencies. > > > > > [1] Currently the frequency needs to match exactly. That's a > > separate issue: we should probably add a knob to allow a slight > > variation in TSC frequency (e.g. <1% difference). > > Regards, > Daniel > -- > |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| > |: http://libvirt.org -o- http://virt-manager.org :| > |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|
On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > > On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > > > > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > > > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > > > > > > > > Instead of blocking migration on the source when invtsc is > > > > > > > > enabled, rely on the migration destination to ensure there's no > > > > > > > > TSC frequency mismatch. > > > > > > > > > > > > > > > > We can't allow migration unconditionally because we don't know if > > > > > > > > the destination is a QEMU version that is really going to ensure > > > > > > > > there's no TSC frequency mismatch. To ensure we are migrating to > > > > > > > > a destination that won't ignore SET_TSC_KHZ errors, allow invtsc > > > > > > > > migration only on pc-*-2.9 and newer. > > > > > > > > > > > > > > > > Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> > > > > > [...] > > > > > > > > @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > > > > > > > > } > > > > > > > > > > > > > > > > if (level == KVM_PUT_FULL_STATE) { > > > > > > > > - /* We don't check for kvm_arch_set_tsc_khz() errors here, > > > > > > > > - * because TSC frequency mismatch shouldn't abort migration, > > > > > > > > - * unless the user explicitly asked for a more strict TSC > > > > > > > > - * setting (e.g. using an explicit "tsc-freq" option). > > > > > > > > + /* Migration TSC frequency mismatch is fatal only if we are > > > > > > > > + * actually reporting Invariant TSC to the guest. > > > > > > > > */ > > > > > > > > - kvm_arch_set_tsc_khz(cpu); > > > > > > > > + ret = kvm_arch_set_tsc_khz(cpu); > > > > > > > > + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && > > > > > > > > + ret < 0) { > > > > > > > > + return ret; > > > > > > > > + } > > > > > > > > } > > > > > > > > > > > > > > Will the guest continue in the source in this case? > > > > > > > > > > > > > > I think this is past the point where migration has been declared > > > > > > > successful. > > > > > > > > > > > > > > Otherwise looks good. > > > > > > > > > > > > Good point. I will make additional tests and see if there's some > > > > > > other place where the kvm_arch_set_tsc_khz() call can be moved > > > > > > to. > > > > > > > > > > So, if we solve this and do something on (for example) post_load, > > > > > we still have a problem: device state is migrated after RAM. This > > > > > means QEMU will check for TSC scaling and abort migration very > > > > > late. > > > > > > > > > > We could solve that by manually registering a SaveVMHandler that > > > > > will send the TSC frequency on save_live_setup, so migration is > > > > > aborted earlier. > > > > > > > > > > But: this sounds like just a complex hack to work around the real > > > > > problems: > > > > > > > > > > 1) TSC frequency is guest-visible, and anything that affects > > > > > guest ABI should depend on the VM configuration, not on host > > > > > capabilities; > > > > > > > > Well not really: the TSC frequency where the guest starts > > > > is the frequency the guest software expects. > > > > So it does depend on host capabilities. > > > > > > Could you explain where this expectation comes from? I can see > > > multiple cases where choosing the TSC frequency where the VM > > > starts is not the best option. > > > > 1. Boot guest. > > 2. Calibrate TSC. > > 3. Use delay() with TSC calibration above, or > > use TSC to measure the passage of time (TSC clock > > in userspace). > > If TSC scaling is available, using a different frequency should > be safe, shouldn't it? Otherwise, migrating with TSC scaling > wouldn't be safe either. Yes, but if you don't have TSC scaling, you have to boot with the host frequency. > Anyway: I don't disagree the starting host frequency is a good > default. It is. But I don't think it's the best option on all > cases. Agree. > > > > > I am considering two possible scenarios below. You probably have > > > another scenario in mind, so it would be useful if you could > > > describe it so we can consider possible solutions. > > > > > > > > > Scenario 1: > > > > > > You have two hosts: A and B, both with TSC scaling. They have > > > different frequencies. The VM can be started on either one of > > > them, and can be migrated to either one depending on the policy > > > of management software. > > > > > > Maybe B is a backup host, the VM is expected to run most times on > > > host A, and we want to use the TSC frequency from host A every > > > time. Maybe it's the opposite and we want to use the frequency of > > > B. Maybe we want to use the lowest frequency of both, maybe the > > > highest one. We (QEMU developers) can recommend policy to libvirt > > > developers, but a given QEMU process doesn't have information to > > > decide what's best. > > > > I can't see any practical scenario here, you will always want > > to start with TSC frequency of the host where the VM was started. > > > > If i am mistaken, please describe a practical case. > > > > (If a practical scenario comes up, and there is a use-case > > for setting the TSC frequency on startup, lets say > > a Windows VM which fails to boot if the TSC frequency > > is too high, then it should be supported... But the > > only known scenario to me, applying to 99.999% of cases, > > is to expose the TSC frequency where the guest booted at). > > I don't have any specific case: my point is that I can't tell > what's the best frequency if I don't know where the hosts are > expected to be migrated to. > > You claim that using the starting host frequency is the best > option on the vast majority of cases. Maybe it's true, and it > would be a good default. The only problem is that this default > affects migratability: > > > > > > Scenario 2: > > > > > > Host A has TSC scaling, host B doesn't have TSC scaling. We want > > > to be able to start the VM on host A, and migrate to B. In this > > > case, the only possible solution is to use B's frequency when > > > starting the VM. The QEMU process doesn't have enough information > > > to make that decision. > > > > That is a good point. But again, its a special case and > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > However, for the vast majority of 99.999% cases, the issue > > can be handled entirely in QEMU, without libvirt involvement, > > and without adding extra steps to the management software. > > I agree it should cover most cases. The only problem here is that > it can break migration in unexpected ways. > > Then my point is: assuming that libvirt will prefer to require > explicit TSC frequency configuration to enable invtsc migration > (instead of getting unpredictable migration compatibility), is > the added complexity to migration code worth the effort, if > choosing an explicit frequency is safer and more predictable? I > believe this is where we disagree. > > > > > > > > 2) Setting TSC frequency depending on the host will make > > > > > migratability unpredictable for management software: the same > > > > > VM config could be migratable to host A when started on host > > > > > B, but not migratable to host A when started on host C. > > > > > > > > Well, just check the frequency. > > > > > > What do you mean by "just check the frequency"? What exactly > > > should management software do? > > > > VM booted on host-A can be migrated to host-B if TSC > > frequency matches. > > Except when TSC scaling is available. Then you may or may not > migrate from A to B, and you don't know that unless you actually > try to migrate. > > > > > > Whatever management software do, if you just use the source host > > > frequency you can get into a situation where you can start a VM > > > on host A and migrate it to B, but can't start the VM on host B > > > and migrate it to A. This would be confusing for users, and > > > probably break assumptions of existing management software. > > > > Well this is a side effect of invariant TSC and migration. > > > > Look, i agree with all your points, but my point is this: i personally > > prefer to handle the 99.999% case, which is the TSC frequency exposed is the one > > from the host where the guest booted, in QEMU entirely (for example, to > > make life easier for people who run qemu manually such as myself). > > Right. I don't mind not covering 100% of cases. But I worry if > the cases not covered by us behave unexpectedly and > unpredictably. If we caused a failure every time an > unsafe/unsupported config was used, it would be OK. But making > the ability to migrate unpredictable is a more serious issue IMO. > > I believe we both agree about how the final version should > behave, but disagree about what is more important in the first > version: > > * My proposal for the first version means: > * People would have to configure TSC frequency manually if > they want invtsc + migration (until we also provide a > mechanism to query the host TSC frequency so > management/scripts could choose it as default); > * Adding more code to libvirt. > * Your proposal for the first version means: > * The ability to migrate won't be predictable by libvirt; > * Extra complexity on the migration code to ensure > we abort migration on mismatch. > > We seem to be weighting those issues differently. To me, having > predictability on migration ability since the first version is > more important to me than making configuration easier (on the > first version). > > > > > > > > I suggest we allow migration with invtsc if and only if > > > > > tsc-frequency is set explicitly by management software. In other > > > > > words, apply only patches 1/4 and 2/4 from this series. After > > > > > that, we will need libvirt support for configuring tsc-frequency. > > > > > > > > I don't like that for the following reasons: > > > > > > > > * It moves low level complexity from QEMU/KVM to libvirt > > > > (libvirt has to call KVM_GET_TSC_KHZ from a vcpu thread). > > > > > > It doesn't. It could ask QEMU for that information (the new > > > query-cpu-model-expansion QMP command would allow that). > > > > > > Or, alternatively, it could just let the user choose the > > > frequency. > > > > Again, you want to expose the host where the VM booted in most > > cases (except the ones you list above). > > > > > It's probably acceptable for many use cases where > > > invtsc+migration is important. > > > > Ok, thats better. Can you add a patch to your series with the steps > > as how mgmt software should proceed ? > > > > > > * It makes it difficult to run QEMU manually (i use QEMU > > > > manually all the time). > > > > > > I believe the set of people that: 1) need invtsc; 2) need > > > migration to work; and 3) run QEMU manually will be able to add > > > "tsc-frequency=XXX" to the command-line easily. :) > > > > Ok, so migration is only allowed if tsc-frequency= is specified. > > > > > > * It requires patching libvirt. > > > > > > > > In my experience things work better when the functionality is > > > > not split across libvirt/qemu. > > > > > > In my experience things break when management software is the > > > only component able to make a decision but we don't provide > > > mechanisms to let management software make that decision. > > > > > > The TSC frequency affects migratability to hosts. Choose the > > > wrong frequency, and you might not be able to migrate. Only > > > management software knows to which hosts the VM could be migrated > > > in the future, and which TSC frequency would allow that. > > > > True. > > > > > > Can't this be fixed in QEMU? Just check that destination host supports > > > > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > > > > matches on source and destination). > > > > > > > > > > This solves only one use case: where you want to expose the > > > starting host TSC frequency to the guest. It doesn't cover any > > > scenario where the TSC frequency of the starting host isn't the > > > best one. (See the two scenarios I described above) > > > > True. > > > > > You seem to have a specific use case in mind. If you describe it, > > > we could decide if it's worth the extra migration code complexity > > > to deal with invtsc migration without explicit tsc-frequency. By > > > now, I am not convinced yet. > > > > I don't have any specific use case in mind. I'm just trying to > > avoid moving complexity to libvirt which in my experience is a > > the best thing to do. > > I think both our proposals make things harder for libvirt in > different ways. I just want to make the complexity explicit for > libvirt, instead of giving them the illusion of safety (making > migration break in ways libvirt can't detect). > > Anyway, your points are still valid and it would be still useful > to do what you propose. I will give it a try on a new version of > patch 4/4 that can abort migration earlier. But note that I > expect some pushback from other maintainers because of the > complexity of the code. If that happens, be aware that I won't be > able to justify the added complexity because I would still think > it's not worth it. > > > > > > Maybe your use case just needs a explicit "invtsc-migration=on" > > > command-line flag without a mechanism to abort migration on > > > mismatch? I can't tell. > > > > Again, there is no special case. > > > > > Note that even if we follow your suggestion and implement an > > > alternative version of patch 4/4 to cover your use case, I will > > > strongly recommend libvirt developers to support configuring TSC > > > frequency if they want to support invtsc + migration without > > > surprising/unpredictable restrictions on migratability. > > > > Well, alright. If you make the TSC frequency of the host > > available to mgmt software as you describe, and write the steps mgmt > > software should take, i'm good. > > I plan to. The problem is that the mechanism to query the host > frequency may be unavailable in the first version. Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty easy. Let me know if you need any help coding or testing.
On Fri, Jan 06, 2017 at 08:31:27AM -0200, Marcelo Tosatti wrote: > On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > > > On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > > > > > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > > > > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > > > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: [...] > > > > > Can't this be fixed in QEMU? Just check that destination host supports > > > > > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > > > > > matches on source and destination). > > > > > > > > > > > > > This solves only one use case: where you want to expose the > > > > starting host TSC frequency to the guest. It doesn't cover any > > > > scenario where the TSC frequency of the starting host isn't the > > > > best one. (See the two scenarios I described above) > > > > > > True. > > > > > > > You seem to have a specific use case in mind. If you describe it, > > > > we could decide if it's worth the extra migration code complexity > > > > to deal with invtsc migration without explicit tsc-frequency. By > > > > now, I am not convinced yet. > > > > > > I don't have any specific use case in mind. I'm just trying to > > > avoid moving complexity to libvirt which in my experience is a > > > the best thing to do. > > > > I think both our proposals make things harder for libvirt in > > different ways. I just want to make the complexity explicit for > > libvirt, instead of giving them the illusion of safety (making > > migration break in ways libvirt can't detect). > > > > Anyway, your points are still valid and it would be still useful > > to do what you propose. I will give it a try on a new version of > > patch 4/4 that can abort migration earlier. But note that I > > expect some pushback from other maintainers because of the > > complexity of the code. If that happens, be aware that I won't be > > able to justify the added complexity because I would still think > > it's not worth it. I tried to move the destination TSC-scaling check to post_load, and the bad news is that post_load is also too late to abort migration (destination aborts, but source shows migration as completed). I'm CCing Juan, Amit and David to see if they have any suggestion. Summarizing the problem for them: on some cases we want to allow migration only if the destination host has a matching TSC frequency or if TSC scaling is supported by the destination host. I don't know what's the best way to implement that check. We could either: * Send TSC frequency/scaling info from the the source host, and make the source host abort migration. Is there a good mechanism for that? * Make the destination host abort migration. I don't know if there's a safe mechanism for that. While we figure that out, I will submit v2 without patches 3/4 and 4/4, because patch 2/2 (allowing migration if tsc-freq is explicitly set) is simple and useful. After that, we can add: * The mechanism to query the host TSC frequency (see below) * The mechanism you asked for, to allow migration without explicit TSC frequency if we know the destination host supports TSC scaling or has a matching TSC frequency (more complex, and maybe unnecessary if we implement the previous item) > > > > > > > > > Maybe your use case just needs a explicit "invtsc-migration=on" > > > > command-line flag without a mechanism to abort migration on > > > > mismatch? I can't tell. > > > > > > Again, there is no special case. > > > > > > > Note that even if we follow your suggestion and implement an > > > > alternative version of patch 4/4 to cover your use case, I will > > > > strongly recommend libvirt developers to support configuring TSC > > > > frequency if they want to support invtsc + migration without > > > > surprising/unpredictable restrictions on migratability. > > > > > > Well, alright. If you make the TSC frequency of the host > > > available to mgmt software as you describe, and write the steps mgmt > > > software should take, i'm good. > > > > I plan to. The problem is that the mechanism to query the host > > frequency may be unavailable in the first version. > > Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty > easy. I plan to expose it as part of query-cpu-model-expansion (work in progress, right now) when querying the "host" CPU model. > > Let me know if you need any help coding or testing. Thanks!
On Fri, Jan 06, 2017 at 08:31:27AM -0200, Marcelo Tosatti wrote: [...] > > > > > > > Maybe your use case just needs a explicit "invtsc-migration=on" > > > > command-line flag without a mechanism to abort migration on > > > > mismatch? I can't tell. > > > > > > Again, there is no special case. > > > > > > > Note that even if we follow your suggestion and implement an > > > > alternative version of patch 4/4 to cover your use case, I will > > > > strongly recommend libvirt developers to support configuring TSC > > > > frequency if they want to support invtsc + migration without > > > > surprising/unpredictable restrictions on migratability. > > > > > > Well, alright. If you make the TSC frequency of the host > > > available to mgmt software as you describe, and write the steps mgmt > > > software should take, i'm good. > > > > I plan to. The problem is that the mechanism to query the host > > frequency may be unavailable in the first version. > > Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty > easy. > > Let me know if you need any help coding or testing. I just found out that KVM doesn't provide something that QEMU and libvirt need: the value of kvm_max_guest_tsc_khz. Without it, we have no way to know if a given VM is really migratable to a host. Could we add a KVM_CAP_MAX_TSC_KHZ capability for that?
* Eduardo Habkost (ehabkost@redhat.com) wrote: > On Fri, Jan 06, 2017 at 08:31:27AM -0200, Marcelo Tosatti wrote: > > On Thu, Jan 05, 2017 at 10:19:50AM -0200, Eduardo Habkost wrote: > > > On Thu, Jan 05, 2017 at 08:48:32AM -0200, Marcelo Tosatti wrote: > > > > On Wed, Jan 04, 2017 at 11:36:31PM -0200, Eduardo Habkost wrote: > > > > > On Wed, Jan 04, 2017 at 08:26:27PM -0200, Marcelo Tosatti wrote: > > > > > > On Wed, Jan 04, 2017 at 05:59:17PM -0200, Eduardo Habkost wrote: > > > > > > > On Wed, Jan 04, 2017 at 11:39:16AM -0200, Eduardo Habkost wrote: > > > > > > > > On Wed, Jan 04, 2017 at 09:56:56AM -0200, Marcelo Tosatti wrote: > > > > > > > > > On Tue, Dec 27, 2016 at 05:21:20PM -0200, Eduardo Habkost wrote: > [...] > > > > > > Can't this be fixed in QEMU? Just check that destination host supports > > > > > > TSC scaling before migration (or that KVM_GET_TSC_KHZ return value > > > > > > matches on source and destination). > > > > > > > > > > > > > > > > This solves only one use case: where you want to expose the > > > > > starting host TSC frequency to the guest. It doesn't cover any > > > > > scenario where the TSC frequency of the starting host isn't the > > > > > best one. (See the two scenarios I described above) > > > > > > > > True. > > > > > > > > > You seem to have a specific use case in mind. If you describe it, > > > > > we could decide if it's worth the extra migration code complexity > > > > > to deal with invtsc migration without explicit tsc-frequency. By > > > > > now, I am not convinced yet. > > > > > > > > I don't have any specific use case in mind. I'm just trying to > > > > avoid moving complexity to libvirt which in my experience is a > > > > the best thing to do. > > > > > > I think both our proposals make things harder for libvirt in > > > different ways. I just want to make the complexity explicit for > > > libvirt, instead of giving them the illusion of safety (making > > > migration break in ways libvirt can't detect). > > > > > > Anyway, your points are still valid and it would be still useful > > > to do what you propose. I will give it a try on a new version of > > > patch 4/4 that can abort migration earlier. But note that I > > > expect some pushback from other maintainers because of the > > > complexity of the code. If that happens, be aware that I won't be > > > able to justify the added complexity because I would still think > > > it's not worth it. > > I tried to move the destination TSC-scaling check to post_load, > and the bad news is that post_load is also too late to abort > migration (destination aborts, but source shows migration as > completed). > > I'm CCing Juan, Amit and David to see if they have any > suggestion. Yes, that's not too surprising; we don't have any 'finished' flag in the stream; it's just a one directional stream where the source claims to have finished after it's stuffed the last byte into the socket (I think before it's closed it). > Summarizing the problem for them: on some cases we want to allow > migration only if the destination host has a matching TSC > frequency or if TSC scaling is supported by the destination host. > I don't know what's the best way to implement that check. We > could either: > > * Send TSC frequency/scaling info from the the source host, and > make the source host abort migration. Is there a good mechanism > for that? Normal migration is just a 1-directional stream, so we dont have a way to get the TSC frequency back to the source host. > * Make the destination host abort migration. I don't know if > there's a safe mechanism for that. The only safe way to do it is to do it early; preferably before the RAM so that you know (via buffer depths) that the source can't have got to the end yet (and that's assuming these are constant values that are checked, not something guest configurable). We do have a 'global_state' in migration.c - but it's just for migration state rather than device state. I don't think we currently have any hook for sending device state early; the setup phase hooks on devices are only called for iterative devices (i.e. RAM/block etc); I think to do what you need we need to either add a subsection into vmstate_globalstate in migration.c (which we somehow have to make device/machine independent) or we have to find a way to have setup sections for non-iterative devices. One worry; is this going to affect loading of snapshots on the same host? If I take a snapshot, and then come back on a nice warm day is the TSC on the host going to be sufficiently different the snapshot wont load? Dave > > > > While we figure that out, I will submit v2 without patches 3/4 > and 4/4, because patch 2/2 (allowing migration if tsc-freq is > explicitly set) is simple and useful. After that, we can add: > > * The mechanism to query the host TSC frequency (see below) > * The mechanism you asked for, to allow migration without > explicit TSC frequency if we know the destination host supports > TSC scaling or has a matching TSC frequency (more complex, and > maybe unnecessary if we implement the previous item) > > > > > > > > > > > > > > Maybe your use case just needs a explicit "invtsc-migration=on" > > > > > command-line flag without a mechanism to abort migration on > > > > > mismatch? I can't tell. > > > > > > > > Again, there is no special case. > > > > > > > > > Note that even if we follow your suggestion and implement an > > > > > alternative version of patch 4/4 to cover your use case, I will > > > > > strongly recommend libvirt developers to support configuring TSC > > > > > frequency if they want to support invtsc + migration without > > > > > surprising/unpredictable restrictions on migratability. > > > > > > > > Well, alright. If you make the TSC frequency of the host > > > > available to mgmt software as you describe, and write the steps mgmt > > > > software should take, i'm good. > > > > > > I plan to. The problem is that the mechanism to query the host > > > frequency may be unavailable in the first version. > > > > Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty > > easy. > > I plan to expose it as part of query-cpu-model-expansion (work in > progress, right now) when querying the "host" CPU model. > > > > > Let me know if you need any help coding or testing. > > Thanks! > > -- > Eduardo -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 08/01/2017 21:28, Eduardo Habkost wrote: >> Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty >> easy. >> >> Let me know if you need any help coding or testing. > I just found out that KVM doesn't provide something that QEMU and > libvirt need: the value of kvm_max_guest_tsc_khz. Without it, we > have no way to know if a given VM is really migratable to a host. > > Could we add a KVM_CAP_MAX_TSC_KHZ capability for that? The ratio is really quite high, 256x the host frequency for AMD and 65536x for Intel. Anything below 2^32 Hz (above that, there would probably be other failures) is safe. Paolo
On 05/01/2017 11:48, Marcelo Tosatti wrote: >> Host A has TSC scaling, host B doesn't have TSC scaling. We want >> to be able to start the VM on host A, and migrate to B. In this >> case, the only possible solution is to use B's frequency when >> starting the VM. The QEMU process doesn't have enough information >> to make that decision. > That is a good point. But again, its a special case and > should be supported by -cpu xxx,tsc-frequency=zzzz. I don't think this is a scenario that can work reliably. The computed TSC frequency may vary by 0.5% or so on every boot (e.g. you may get 2497000 kHz or 2511000 kHz for a 2.5 GHz TSC). You can start the VM on host A, reboot host B, and then you'll be unable to migrate. Paolo
On 05/01/2017 14:00, Daniel P. Berrange wrote: >> If you do this unconditionally, you have another problem: if >> tsc-frequency is set explicitly, migration is only possible if >> TSC frequency of the destination matches[1], or if TSC scaling is >> supported by the destination. It's a good idea to set a TSC >> frequency only if invtsc is enabled explicitly in the config. > > If we don't set tsc-frequency and the TSC frequency doesn't > match, does that mean the guest migration succeed, but suddenly > sees different TSC frequency ? That's the reason why kvmclock exists (or more precisely, the reason why kvmclock is still useful even when hosts have invtsc). Paolo > I guess we we allowed that historically we can't break that > now, so setting it only if invtsc is set seems reasonable.
On Tue, Jan 10, 2017 at 05:36:48PM +0100, Paolo Bonzini wrote: > > > On 05/01/2017 11:48, Marcelo Tosatti wrote: > >> Host A has TSC scaling, host B doesn't have TSC scaling. We want > >> to be able to start the VM on host A, and migrate to B. In this > >> case, the only possible solution is to use B's frequency when > >> starting the VM. The QEMU process doesn't have enough information > >> to make that decision. > > That is a good point. But again, its a special case and > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > I don't think this is a scenario that can work reliably. The computed > TSC frequency may vary by 0.5% or so on every boot (e.g. you may get > 2497000 kHz or 2511000 kHz for a 2.5 GHz TSC). You can start the VM on > host A, reboot host B, and then you'll be unable to migrate. Right, so it means invtsc migration without TSC scaling will be possible in practice only if we tolerate a small variance on TSC frequency on migration. The question is: should we? Can we tolerate a 0.5% variance on TSC frequency and still expose invtsc to the guest?
On Mon, Jan 09, 2017 at 03:58:11PM +0100, Paolo Bonzini wrote: > > > On 08/01/2017 21:28, Eduardo Habkost wrote: > >> Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty > >> easy. > >> > >> Let me know if you need any help coding or testing. > > I just found out that KVM doesn't provide something that QEMU and > > libvirt need: the value of kvm_max_guest_tsc_khz. Without it, we > > have no way to know if a given VM is really migratable to a host. > > > > Could we add a KVM_CAP_MAX_TSC_KHZ capability for that? > > The ratio is really quite high, 256x the host frequency for AMD and > 65536x for Intel. Anything below 2^32 Hz (above that, there would > probably be other failures) is safe. 2^32 Hz (~4.3 GHz) sounds like a limit likely to be hit by future CPUs. Which kind of failures do you think we could see?
On 11/01/2017 14:26, Eduardo Habkost wrote: > On Mon, Jan 09, 2017 at 03:58:11PM +0100, Paolo Bonzini wrote: >> >> >> On 08/01/2017 21:28, Eduardo Habkost wrote: >>>> Well just export KVM_GET_TSC_KHZ in a QMP command right? Its pretty >>>> easy. >>>> >>>> Let me know if you need any help coding or testing. >>> I just found out that KVM doesn't provide something that QEMU and >>> libvirt need: the value of kvm_max_guest_tsc_khz. Without it, we >>> have no way to know if a given VM is really migratable to a host. >>> >>> Could we add a KVM_CAP_MAX_TSC_KHZ capability for that? >> >> The ratio is really quite high, 256x the host frequency for AMD and >> 65536x for Intel. Anything below 2^32 Hz (above that, there would >> probably be other failures) is safe. > > 2^32 Hz (~4.3 GHz) sounds like a limit likely to be hit by future > CPUs. Which kind of failures do you think we could see? Just integer overflows. Current CPUs have been at 3.9 GHz for a while now. Paolo
On Tue, Jan 10, 2017 at 05:36:48PM +0100, Paolo Bonzini wrote: > > > On 05/01/2017 11:48, Marcelo Tosatti wrote: > >> Host A has TSC scaling, host B doesn't have TSC scaling. We want > >> to be able to start the VM on host A, and migrate to B. In this > >> case, the only possible solution is to use B's frequency when > >> starting the VM. The QEMU process doesn't have enough information > >> to make that decision. > > That is a good point. But again, its a special case and > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > I don't think this is a scenario that can work reliably. The computed > TSC frequency may vary by 0.5% or so on every boot (e.g. you may get > 2497000 kHz or 2511000 kHz for a 2.5 GHz TSC). You can start the VM on > host A, reboot host B, and then you'll be unable to migrate. > > Paolo Including an acceptable error in the comparison seems to be acceptable to work around that case.
On Wed, Jan 18, 2017 at 09:55:43AM -0200, Marcelo Tosatti wrote: > On Tue, Jan 10, 2017 at 05:36:48PM +0100, Paolo Bonzini wrote: > > > > > > On 05/01/2017 11:48, Marcelo Tosatti wrote: > > >> Host A has TSC scaling, host B doesn't have TSC scaling. We want > > >> to be able to start the VM on host A, and migrate to B. In this > > >> case, the only possible solution is to use B's frequency when > > >> starting the VM. The QEMU process doesn't have enough information > > >> to make that decision. > > > That is a good point. But again, its a special case and > > > should be supported by -cpu xxx,tsc-frequency=zzzz. > > > > I don't think this is a scenario that can work reliably. The computed > > TSC frequency may vary by 0.5% or so on every boot (e.g. you may get > > 2497000 kHz or 2511000 kHz for a 2.5 GHz TSC). You can start the VM on > > host A, reboot host B, and then you'll be unable to migrate. > > > > Paolo > > Including an acceptable error in the comparison seems to be > acceptable to work around that case. How much error is acceptable when exposing the invtsc flag to the guest?
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ceeacca..4270923 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -375,7 +375,12 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_8 \ - HW_COMPAT_2_8 + HW_COMPAT_2_8 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "invtsc-migration",\ + .value = "off",\ + }, #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index a7f2f60..ec8cdbc 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1208,6 +1208,7 @@ struct X86CPU { bool expose_kvm; bool migratable; bool host_features; + bool invtsc_migration; uint32_t apic_id; /* if true the CPUID code directly forward host cache leaves to the guest */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b0640f1..cc93b81 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3678,6 +3678,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), + DEFINE_PROP_BOOL("invtsc-migration", X86CPU, invtsc_migration, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 6a51399..2c3ee7b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -962,7 +962,7 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_mcg_ext_ctl = has_msr_feature_control = true; } - if (!env->user_tsc_khz) { + if (!cpu->invtsc_migration && !env->user_tsc_khz) { if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && invtsc_mig_blocker == NULL) { /* for migration */ @@ -972,6 +972,7 @@ int kvm_arch_init_vcpu(CPUState *cs) migrate_add_blocker(invtsc_mig_blocker); /* for savevm */ vmstate_x86_cpu.unmigratable = 1; + } } cpuid_data.cpuid.padding = 0; @@ -2655,12 +2656,14 @@ int kvm_arch_put_registers(CPUState *cpu, int level) } if (level == KVM_PUT_FULL_STATE) { - /* We don't check for kvm_arch_set_tsc_khz() errors here, - * because TSC frequency mismatch shouldn't abort migration, - * unless the user explicitly asked for a more strict TSC - * setting (e.g. using an explicit "tsc-freq" option). + /* Migration TSC frequency mismatch is fatal only if we are + * actually reporting Invariant TSC to the guest. */ - kvm_arch_set_tsc_khz(cpu); + ret = kvm_arch_set_tsc_khz(cpu); + if ((x86_cpu->env.features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && + ret < 0) { + return ret; + } } ret = kvm_getput_regs(x86_cpu, 1);
Instead of blocking migration on the source when invtsc is enabled, rely on the migration destination to ensure there's no TSC frequency mismatch. We can't allow migration unconditionally because we don't know if the destination is a QEMU version that is really going to ensure there's no TSC frequency mismatch. To ensure we are migrating to a destination that won't ignore SET_TSC_KHZ errors, allow invtsc migration only on pc-*-2.9 and newer. Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> --- include/hw/i386/pc.h | 7 ++++++- target/i386/cpu.h | 1 + target/i386/cpu.c | 1 + target/i386/kvm.c | 15 +++++++++------ 4 files changed, 17 insertions(+), 7 deletions(-)