From patchwork Mon Feb 9 20:15:04 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marcelo Tosatti X-Patchwork-Id: 6281 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n19KFYSL011670 for ; Mon, 9 Feb 2009 20:15:34 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750767AbZBIUPc (ORCPT ); Mon, 9 Feb 2009 15:15:32 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750945AbZBIUPb (ORCPT ); Mon, 9 Feb 2009 15:15:31 -0500 Received: from mx2.redhat.com ([66.187.237.31]:59462 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750767AbZBIUPa (ORCPT ); Mon, 9 Feb 2009 15:15:30 -0500 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n19KFUBM013723 for ; Mon, 9 Feb 2009 15:15:30 -0500 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n19KFUvG031558; Mon, 9 Feb 2009 15:15:30 -0500 Received: from amt.cnet (vpn-10-3.str.redhat.com [10.32.10.3]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n19KFOT6022111; Mon, 9 Feb 2009 15:15:26 -0500 Received: from amt.cnet (amt.cnet [127.0.0.1]) by amt.cnet (Postfix) with ESMTP id 6FECE550002; Mon, 9 Feb 2009 18:15:07 -0200 (BRST) Received: (from marcelo@localhost) by amt.cnet (8.14.3/8.14.3/Submit) id n19KF4kO010939; Mon, 9 Feb 2009 18:15:04 -0200 Date: Mon, 9 Feb 2009 18:15:04 -0200 From: Marcelo Tosatti To: Gerd Hoffmann , Avi Kivity Cc: KVM list Subject: Re: [PATCH] Fix kvmclock on !constant_tsc boxes. Message-ID: <20090209201504.GA10603@amt.cnet> References: <4989C7B4.10508@redhat.com> <20090208060856.GC4437@amt.cnet> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20090208060856.GC4437@amt.cnet> User-Agent: Mutt/1.5.18 (2008-05-17) X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org On Sun, Feb 08, 2009 at 04:08:56AM -0200, Marcelo Tosatti wrote: > On Wed, Feb 04, 2009 at 05:52:04PM +0100, Gerd Hoffmann wrote: > > Hi folks, > > > > kvmclock currently falls apart on machines without constant tsc. > > This patch fixes it. Changes: > > > > * keep tsc frequency in a per-cpu variable. > > * handle kvmclock update using a new request flag, thus checking > > whenever we need an update each time we enter guest context. > > * use a cpufreq notifier to track frequency changes and force > > kvmclock updates. > > * send ipis to kick cpu out of guest context if needed to make > > sure the guest doesn't see stale values. > > > > cheers, > > Gerd > > ACK for 2.6.29 (but please fix the whitespace breakage). Whitespace fixed version attached. Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t hv_clock->tsc_to_system_mul); } +static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { - kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = tsc_khz; + if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { + kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); } /* Keep irq disabled to prevent changes to the clock */ @@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static int kvm_request_guest_time_update(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + + if (!vcpu->time_page) + return 0; + set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); + return 1; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu * vcpu->arch.time_page = NULL; } - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); break; } default: @@ -1096,7 +1109,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -2640,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vc } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +static void bounce_off(void *info) +{ + /* nothing */ +} + +static unsigned int ref_freq; +static unsigned long tsc_khz_ref; + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i, send_ipi = 0; + + if (!ref_freq) + ref_freq = freq->old; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->cpu != freq->cpu) + continue; + if (!kvm_request_guest_time_update(vcpu)) + continue; + if (vcpu->cpu != smp_processor_id()) + send_ipi++; + } + } + spin_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* + * We upscale the frequency. Must make the guest + * doesn't see old kvmclock values while running with + * the new frequency, otherwise we risk the guest sees + * time go backwards. + * + * In case we update the frequency for another cpu + * (which might be in guest context) send an interrupt + * to kick the cpu out of guest context. Next time + * guest context is entered kvmclock will be updated, + * so the guest will not see stale values. + */ + smp_call_function_single(freq->cpu, bounce_off, NULL, 1); + } + return 0; +} + +static struct notifier_block kvmclock_cpufreq_notifier_block = { + .notifier_call = kvmclock_cpufreq_notifier +}; + int kvm_arch_init(void *opaque) { - int r; + int r, cpu; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -2673,6 +2749,15 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + return 0; out: @@ -3008,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_v if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) + kvm_write_guest_time(vcpu); if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) kvm_mmu_sync_roots(vcpu); if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) Index: kvm/include/linux/kvm_host.h =================================================================== --- kvm.orig/include/linux/kvm_host.h +++ kvm/include/linux/kvm_host.h @@ -37,6 +37,7 @@ #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 +#define KVM_REQ_KVMCLOCK_UPDATE 8 #define KVM_USERSPACE_IRQ_SOURCE_ID 0