Message ID | 1484749457-87117-1-git-send-email-phil@philjordan.eu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 18/01/2017 15:24, Phil Dennis-Jordan wrote: > --- > target/i386/cpu.c | 1 + > target/i386/cpu.h | 4 ++++ > target/i386/kvm.c | 40 ++++++++++++++++++++++++++++++++-------- > 3 files changed, 37 insertions(+), 8 deletions(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index aba11ae..e5523d4 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -3677,6 +3677,7 @@ static Property x86_cpu_properties[] = { > DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), > DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), > DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), > + DEFINE_PROP_BOOL("vmware-tsc-apic-clocks", X86CPU, vmware_clock_rates, false), Maybe just vmware-cpuid-freq instead? Whatever the choice, please make the bool field in struct X86CPU consistent with the property name (e.g. enable_vmware_cpuid_freq). One issue is that the TSC frequency can change, for example on migration. Telling the guest about the TSC frequency makes little sense if it can change. So the leaf should be conditional on the INVTSC feature (CPUID[0x80000007].EDX bit 8). You can enable this unconditionally for new machine types (i.e. making it true here, and turning it off in include/hw/i386/pc.h's PC_COMPAT_2_8 macro), but only expose it if that bit is also set. > DEFINE_PROP_END_OF_LIST() > }; > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 6c1902b..1d8590b 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1213,6 +1213,10 @@ struct X86CPU { > bool host_features; > uint32_t apic_id; > > + /* Enables publishing of TSC increment and Local APIC bus frequencies to > + * the guest OS in CPUID page 0x40000010, the same way that VMWare does. */ > + bool vmware_clock_rates; > + > /* if true the CPUID code directly forward host cache leaves to the guest */ > bool cache_info_passthrough; > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > index 10a9cd8..7830b3a 100644 > --- a/target/i386/kvm.c > +++ b/target/i386/kvm.c > @@ -778,10 +778,14 @@ int kvm_arch_init_vcpu(CPUState *cs) > } > > if (cpu->expose_kvm) { > + uint32_t kvm_max_page = KVM_CPUID_FEATURES | kvm_base; s/page/leaf/ > + if (cpu->vmware_clock_rates && kvm_base == KVM_CPUID_SIGNATURE) { > + kvm_max_page = MAX(kvm_max_page, KVM_CPUID_SIGNATURE | 0x10); > + } > memcpy(signature, "KVMKVMKVM\0\0\0", 12); > c = &cpuid_data.entries[cpuid_i++]; > c->function = KVM_CPUID_SIGNATURE | kvm_base; > - c->eax = KVM_CPUID_FEATURES | kvm_base; > + c->eax = kvm_max_page; > c->ebx = signature[0]; > c->ecx = signature[1]; > c->edx = signature[2]; > @@ -910,7 +914,6 @@ int kvm_arch_init_vcpu(CPUState *cs) > } > } > > - cpuid_data.cpuid.nent = cpuid_i; > > if (((env->cpuid_version >> 8)&0xF) >= 6 > && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == > @@ -973,12 +976,6 @@ int kvm_arch_init_vcpu(CPUState *cs) > vmstate_x86_cpu.unmigratable = 1; > } > > - cpuid_data.cpuid.padding = 0; > - r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); > - if (r) { > - return r; > - } > - > r = kvm_arch_set_tsc_khz(cs); > if (r < 0) { > return r; > @@ -998,6 +995,33 @@ int kvm_arch_init_vcpu(CPUState *cs) > } > } > > + if (cpu->vmware_clock_rates) { ^^ Here is where you should also check invtsc. > + if (cpu->expose_kvm I think this should not depend on cpu->expose_kvm. This is not a KVM leaf, it's a vmware leaf; if it were a KVM leaf, it would obey kvm_base. Of course checking kvm_base is still a good idea, to avoid stomping on Hyper-V's CPUID space. Thanks, Paolo > + && kvm_base == KVM_CPUID_SIGNATURE > + && env->tsc_khz != 0) { > + /* Publish TSC and LAPIC resolution on CPUID page 0x40000010 > + * like VMWare for benefit of Darwin guests. */ > + c = &cpuid_data.entries[cpuid_i++]; > + c->function = KVM_CPUID_SIGNATURE | 0x10; > + c->eax = env->tsc_khz; > + /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's > + * APIC_BUS_CYCLE_NS*/ > + c->ebx = 1000000; > + c->ecx = c->edx = 0; > + } else { > + error_report( > + "Warning: VMWare-style TSC/LAPIC clock reporting impossible."); > + } > + } > + > + cpuid_data.cpuid.nent = cpuid_i; > + > + cpuid_data.cpuid.padding = 0; > + r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); > + if (r) { > + return r; > + } > + > if (has_xsave) { > env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); > } > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 18, 2017 at 04:05:59PM +0100, Paolo Bonzini wrote: > > > On 18/01/2017 15:24, Phil Dennis-Jordan wrote: > > --- > > target/i386/cpu.c | 1 + > > target/i386/cpu.h | 4 ++++ > > target/i386/kvm.c | 40 ++++++++++++++++++++++++++++++++-------- > > 3 files changed, 37 insertions(+), 8 deletions(-) > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index aba11ae..e5523d4 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -3677,6 +3677,7 @@ static Property x86_cpu_properties[] = { > > DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), > > DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), > > DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), > > + DEFINE_PROP_BOOL("vmware-tsc-apic-clocks", X86CPU, vmware_clock_rates, false), > > Maybe just vmware-cpuid-freq instead? Whatever the choice, please make > the bool field in struct X86CPU consistent with the property name (e.g. > enable_vmware_cpuid_freq). > > One issue is that the TSC frequency can change, for example on > migration. Telling the guest about the TSC frequency makes little sense > if it can change. > > So the leaf should be conditional on the INVTSC feature > (CPUID[0x80000007].EDX bit 8). You can enable this unconditionally for > new machine types (i.e. making it true here, and turning it off in > include/hw/i386/pc.h's PC_COMPAT_2_8 macro), but only expose it if that > bit is also set. It can be made conditional on (invtsc || cpu->user_tsc_khz). If the TSC frequency is configured explicitly, we know it won't change.
On 18/01/2017 17:02, Phil Dennis-Jordan wrote: > > One issue is that the TSC frequency can change, for example on > migration. Telling the guest about the TSC frequency makes little sense > if it can change. > > That makes sense. Darwin can't handle changing TSC frequencies in any > case, regardless of cpuid leaf 0x40000010. Do I deduce correctly from > the following code (lines 967~977) that this bit inhibits migration > intrinsically, so other than depending on it, I don't need to > specifically disable migration for this option? Correct. > So the leaf should be conditional on the INVTSC feature > (CPUID[0x80000007].EDX bit 8). You can enable this unconditionally for > new machine types (i.e. making it true here, and turning it off in > include/hw/i386/pc.h's PC_COMPAT_2_8 macro), but only expose it if that > bit is also set. > > Sorry, you've lost me here. Would you mind explaining in a little more > detail? What would I be enabling unconditionally? (I'm getting lost on > what the various 'this'/'that'/'it' are referring to.) You enable vmware-cpuid-freq unconditionally. But then you actually publish 0x40000010 only if INVTSC is set. > > > + if (cpu->vmware_clock_rates) { > > ^^ Here is where you should also check invtsc. > > > + if (cpu->expose_kvm > > I think this should not depend on cpu->expose_kvm. This is not a KVM > leaf, it's a vmware leaf; if it were a KVM leaf, it would obey kvm_base. > Of course checking kvm_base is still a good idea, to avoid stomping on > Hyper-V's CPUID space. > > Hmm, my thinking here is that leaf 0x40000000 only is published if kvm > or Hyper-V is exposed. Without 0x40000000, Darwin won't find 0x40000010. Of course you're right, but please add a comment like this: /* Guests depend on 0x40000000 to detect this, so do not expose * it unless that leaf is present. */ Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 18 January 2017 at 17:10, Paolo Bonzini <pbonzini@redhat.com> wrote: > You enable vmware-cpuid-freq unconditionally. But then you actually > publish 0x40000010 only if INVTSC is set. Right, got it, thanks. What about the Hyper-V conflict? That will be vastly more probable if the option is on by default. Don't publish leaf 0x40000010, print the warning and carry on in this case (as the patch does at the moment)? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 18/01/2017 17:16, Phil Dennis-Jordan wrote: > On 18 January 2017 at 17:10, Paolo Bonzini <pbonzini@redhat.com> wrote: >> You enable vmware-cpuid-freq unconditionally. But then you actually >> publish 0x40000010 only if INVTSC is set. > Right, got it, thanks. What about the Hyper-V conflict? That will be > vastly more probable if the option is on by default. Don't publish > leaf 0x40000010, print the warning and carry on in this case (as the > patch does at the moment)? Probably it's simplest to remove the warning altogether. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 18, 2017 at 05:04:27PM +0100, Phil Dennis-Jordan wrote: > Thanks for the comments Paulo and Eduardo, > > On 18 January 2017 at 16:05, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > > > + DEFINE_PROP_BOOL("vmware-tsc-apic-clocks", X86CPU, > > vmware_clock_rates, false), > > > > Maybe just vmware-cpuid-freq instead? Whatever the choice, please make > > the bool field in struct X86CPU consistent with the property name (e.g. > > enable_vmware_cpuid_freq). > > > > Sounds good, I've fixed this and the page/leaf terminology mixup for the > next patch iteration. > > One issue is that the TSC frequency can change, for example on > > migration. Telling the guest about the TSC frequency makes little sense > > if it can change. > > > > That makes sense. Darwin can't handle changing TSC frequencies in any case, > regardless of cpuid leaf 0x40000010. Do I deduce correctly from the > following code (lines 967~977) that this bit inhibits migration > intrinsically, so other than depending on it, I don't need to specifically > disable migration for this option? > > if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) { > … > vmstate_x86_cpu.unmigratable = 1; > > (Likewise, it would appear that the user_tsc_khz case Eduardo suggested > already has a migration sanity check in cpu_post_load() too.) user_tsc_khz won't inhibit migration completely, but QEMU will refuse to start if the host TSC frequency isn't an exact match and the host doesn't support TSC scaling. invtsc currently inhibits migration, but I have submitted a series last week that will allow migration with invtsc if user_tsc_khz is set. There are also plans to allow very small TSC frequency mismatches with tsc_user_khz set, so migration with tsc_user_khz can be possible when TSC scaling is unavailable. The discussion is at: Subject: [PATCH 0/4] Allow migration with invtsc if there's no frequency mismatch
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index aba11ae..e5523d4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3677,6 +3677,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), + DEFINE_PROP_BOOL("vmware-tsc-apic-clocks", X86CPU, vmware_clock_rates, false), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6c1902b..1d8590b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1213,6 +1213,10 @@ struct X86CPU { bool host_features; uint32_t apic_id; + /* Enables publishing of TSC increment and Local APIC bus frequencies to + * the guest OS in CPUID page 0x40000010, the same way that VMWare does. */ + bool vmware_clock_rates; + /* if true the CPUID code directly forward host cache leaves to the guest */ bool cache_info_passthrough; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 10a9cd8..7830b3a 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -778,10 +778,14 @@ int kvm_arch_init_vcpu(CPUState *cs) } if (cpu->expose_kvm) { + uint32_t kvm_max_page = KVM_CPUID_FEATURES | kvm_base; + if (cpu->vmware_clock_rates && kvm_base == KVM_CPUID_SIGNATURE) { + kvm_max_page = MAX(kvm_max_page, KVM_CPUID_SIGNATURE | 0x10); + } memcpy(signature, "KVMKVMKVM\0\0\0", 12); c = &cpuid_data.entries[cpuid_i++]; c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = KVM_CPUID_FEATURES | kvm_base; + c->eax = kvm_max_page; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -910,7 +914,6 @@ int kvm_arch_init_vcpu(CPUState *cs) } } - cpuid_data.cpuid.nent = cpuid_i; if (((env->cpuid_version >> 8)&0xF) >= 6 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == @@ -973,12 +976,6 @@ int kvm_arch_init_vcpu(CPUState *cs) vmstate_x86_cpu.unmigratable = 1; } - cpuid_data.cpuid.padding = 0; - r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); - if (r) { - return r; - } - r = kvm_arch_set_tsc_khz(cs); if (r < 0) { return r; @@ -998,6 +995,33 @@ int kvm_arch_init_vcpu(CPUState *cs) } } + if (cpu->vmware_clock_rates) { + if (cpu->expose_kvm + && kvm_base == KVM_CPUID_SIGNATURE + && env->tsc_khz != 0) { + /* Publish TSC and LAPIC resolution on CPUID page 0x40000010 + * like VMWare for benefit of Darwin guests. */ + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | 0x10; + c->eax = env->tsc_khz; + /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's + * APIC_BUS_CYCLE_NS*/ + c->ebx = 1000000; + c->ecx = c->edx = 0; + } else { + error_report( + "Warning: VMWare-style TSC/LAPIC clock reporting impossible."); + } + } + + cpuid_data.cpuid.nent = cpuid_i; + + cpuid_data.cpuid.padding = 0; + r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); + if (r) { + return r; + } + if (has_xsave) { env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); }
This fixes timekeeping of x86-64 Darwin/OS X/macOS guests when using KVM. Darwin/OS X/macOS for x86-64 uses the TSC for timekeeping; it normally calibrates this by querying various clock frequency scaling MSRs. Details depend on the exact CPU model detected. The local APIC timer frequency is extracted from (EFI) firmware. This is problematic in the presence of virtualisation, as the MSRs in question are typically not handled by the hypervisor. VMWare (Fusion) advertises TSC and APIC frequency via a custom 0x40000010 CPUID leaf, in the eax and ebx registers respectively. This is documented at https://lwn.net/Articles/301888/ among other places. Darwin/OS X/macOS looks for the generic 0x40000000 hypervisor leaf, and if this indicates via eax that leaf 0x40000010 might be available, that is in turn queried for the two frequencies. This adds a CPU option "vmware-tsc-apic-clocks" to enable the same behaviour when running Qemu with KVM acceleration, if the KVM TSC frequency can be established. The virtualised APIC bus cycle is hardcoded to 1GHz in KVM, so ebx of the CPUID leaf is also hardcoded to this value. Signed-off-by: Phil Dennis-Jordan <phil@philjordan.eu> --- target/i386/cpu.c | 1 + target/i386/cpu.h | 4 ++++ target/i386/kvm.c | 40 ++++++++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 8 deletions(-)