Message ID | 1582684862-10880-1-git-send-email-wanpengli@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] KVM: LAPIC: Recalculate apic map in batch | expand |
On 26/02/20 03:41, Wanpeng Li wrote: > From: Wanpeng Li <wanpengli@tencent.com> > > In the vCPU reset and set APIC_BASE MSR path, the apic map will be recalculated > several times, each time it will consume 10+ us observed by ftrace in my > non-overcommit environment since the expensive memory allocate/mutex/rcu etc > operations. This patch optimizes it by recaluating apic map in batch, I hope > this can benefit the serverless scenario which can frequently create/destroy > VMs. > > Before patch: > > kvm_lapic_reset ~27us > > After patch: > > kvm_lapic_reset ~14us > > Observed by ftrace, improve ~48%. > > Signed-off-by: Wanpeng Li <wanpengli@tencent.com> > --- > v2 -> v3: > * move apic_map_dirty to kvm_arch > * add the suggestions from Paolo > > v1 -> v2: > * add apic_map_dirty to kvm_lapic > * error condition in kvm_apic_set_state, do recalcuate unconditionally > > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/lapic.c | 46 ++++++++++++++++++++++++++++++++--------- > arch/x86/kvm/lapic.h | 1 + > arch/x86/kvm/x86.c | 1 + > 4 files changed, 39 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 40a0c0f..4380ed1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -920,6 +920,7 @@ struct kvm_arch { > atomic_t vapics_in_nmi_mode; > struct mutex apic_map_lock; > struct kvm_apic_map *apic_map; > + bool apic_map_dirty; > > bool apic_access_page_done; > unsigned long apicv_inhibit_reasons; > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index afcd30d..de832aa 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -164,14 +164,28 @@ static void kvm_apic_map_free(struct rcu_head *rcu) > kvfree(map); > } > > -static void recalculate_apic_map(struct kvm *kvm) > +void kvm_recalculate_apic_map(struct kvm *kvm) > { > struct kvm_apic_map *new, *old = NULL; > struct kvm_vcpu *vcpu; > int i; > u32 max_id = 255; /* enough space for any xAPIC ID */ > > + if (!kvm->arch.apic_map_dirty) { > + /* > + * Read kvm->arch.apic_map_dirty before > + * kvm->arch.apic_map > + */ > + smp_rmb(); > + return; > + } > + > mutex_lock(&kvm->arch.apic_map_lock); > + if (!kvm->arch.apic_map_dirty) { > + /* Someone else has updated the map. */ > + mutex_unlock(&kvm->arch.apic_map_lock); > + return; > + } > > kvm_for_each_vcpu(i, vcpu, kvm) > if (kvm_apic_present(vcpu)) > @@ -236,6 +250,12 @@ static void recalculate_apic_map(struct kvm *kvm) > old = rcu_dereference_protected(kvm->arch.apic_map, > lockdep_is_held(&kvm->arch.apic_map_lock)); > rcu_assign_pointer(kvm->arch.apic_map, new); > + /* > + * Write kvm->arch.apic_map before > + * clearing apic->apic_map_dirty > + */ > + smp_wmb(); > + kvm->arch.apic_map_dirty = false; > mutex_unlock(&kvm->arch.apic_map_lock); > > if (old) > @@ -257,20 +277,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > else > static_key_slow_inc(&apic_sw_disabled.key); > > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > } > > static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) > { > kvm_lapic_set_reg(apic, APIC_ID, id << 24); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) > { > kvm_lapic_set_reg(apic, APIC_LDR, id); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) > @@ -286,7 +306,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) > > kvm_lapic_set_reg(apic, APIC_ID, id); > kvm_lapic_set_reg(apic, APIC_LDR, ldr); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) > @@ -1912,7 +1932,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) > case APIC_DFR: > if (!apic_x2apic_mode(apic)) { > kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } else > ret = 1; > break; > @@ -2018,6 +2038,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) > break; > } > > + kvm_recalculate_apic_map(apic->vcpu->kvm); > + > return ret; > } > EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); > @@ -2166,7 +2188,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > static_key_slow_dec_deferred(&apic_hw_disabled); > } else { > static_key_slow_inc(&apic_hw_disabled.key); > - recalculate_apic_map(vcpu->kvm); > + vcpu->kvm->arch.apic_map_dirty = true; > } > } > > @@ -2207,6 +2229,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > if (!apic) > return; > > + vcpu->kvm->arch.apic_map_dirty = false; > /* Stop the timer in case it's a reset to an active apic */ > hrtimer_cancel(&apic->lapic_timer.timer); > > @@ -2258,6 +2281,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > > vcpu->arch.apic_arb_prio = 0; > vcpu->arch.apic_attention = 0; > + > + kvm_recalculate_apic_map(vcpu->kvm); > } > > /* > @@ -2479,17 +2504,18 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) > struct kvm_lapic *apic = vcpu->arch.apic; > int r; > > - > kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); > /* set SPIV separately to get count of SW disabled APICs right */ > apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); > > r = kvm_apic_state_fixup(vcpu, s, true); > - if (r) > + if (r) { > + kvm_recalculate_apic_map(vcpu->kvm); > return r; > + } > memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); > > - recalculate_apic_map(vcpu->kvm); > + kvm_recalculate_apic_map(vcpu->kvm); > kvm_apic_set_version(vcpu); > > apic_update_ppr(apic); > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index ec6fbfe..7581bc2 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -78,6 +78,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); > void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); > u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); > +void kvm_recalculate_apic_map(struct kvm *kvm); > void kvm_apic_set_version(struct kvm_vcpu *vcpu); > int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val); > int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 79bc995..d3802a2 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -350,6 +350,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > } > > kvm_lapic_set_base(vcpu, msr_info->data); > + kvm_recalculate_apic_map(vcpu->kvm); > return 0; > } > EXPORT_SYMBOL_GPL(kvm_set_apic_base); > Queued, thanks. Paolo
On Wed, 26 Feb 2020 10:41:02 +0800 Wanpeng Li <kernellwp@gmail.com> wrote: > From: Wanpeng Li <wanpengli@tencent.com> > > In the vCPU reset and set APIC_BASE MSR path, the apic map will be recalculated > several times, each time it will consume 10+ us observed by ftrace in my > non-overcommit environment since the expensive memory allocate/mutex/rcu etc > operations. This patch optimizes it by recaluating apic map in batch, I hope > this can benefit the serverless scenario which can frequently create/destroy > VMs. > > Before patch: > > kvm_lapic_reset ~27us > > After patch: > > kvm_lapic_reset ~14us > > Observed by ftrace, improve ~48%. > > Signed-off-by: Wanpeng Li <wanpengli@tencent.com> this broke CPU hotplug, qemu-kvm -m 2G -smp 4,maxcpus=8 -monitor stdio (qemu) device_add qemu64-x86_64-cpu,socket-id=4,core-id=0,thread-id=0 in guest fails with: smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 which makes me suspect that INIT/SIPI wasn't delivered Is it a know issue? > --- > v2 -> v3: > * move apic_map_dirty to kvm_arch > * add the suggestions from Paolo > > v1 -> v2: > * add apic_map_dirty to kvm_lapic > * error condition in kvm_apic_set_state, do recalcuate unconditionally > > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/lapic.c | 46 ++++++++++++++++++++++++++++++++--------- > arch/x86/kvm/lapic.h | 1 + > arch/x86/kvm/x86.c | 1 + > 4 files changed, 39 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 40a0c0f..4380ed1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -920,6 +920,7 @@ struct kvm_arch { > atomic_t vapics_in_nmi_mode; > struct mutex apic_map_lock; > struct kvm_apic_map *apic_map; > + bool apic_map_dirty; > > bool apic_access_page_done; > unsigned long apicv_inhibit_reasons; > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index afcd30d..de832aa 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -164,14 +164,28 @@ static void kvm_apic_map_free(struct rcu_head *rcu) > kvfree(map); > } > > -static void recalculate_apic_map(struct kvm *kvm) > +void kvm_recalculate_apic_map(struct kvm *kvm) > { > struct kvm_apic_map *new, *old = NULL; > struct kvm_vcpu *vcpu; > int i; > u32 max_id = 255; /* enough space for any xAPIC ID */ > > + if (!kvm->arch.apic_map_dirty) { > + /* > + * Read kvm->arch.apic_map_dirty before > + * kvm->arch.apic_map > + */ > + smp_rmb(); > + return; > + } > + > mutex_lock(&kvm->arch.apic_map_lock); > + if (!kvm->arch.apic_map_dirty) { > + /* Someone else has updated the map. */ > + mutex_unlock(&kvm->arch.apic_map_lock); > + return; > + } > > kvm_for_each_vcpu(i, vcpu, kvm) > if (kvm_apic_present(vcpu)) > @@ -236,6 +250,12 @@ static void recalculate_apic_map(struct kvm *kvm) > old = rcu_dereference_protected(kvm->arch.apic_map, > lockdep_is_held(&kvm->arch.apic_map_lock)); > rcu_assign_pointer(kvm->arch.apic_map, new); > + /* > + * Write kvm->arch.apic_map before > + * clearing apic->apic_map_dirty > + */ > + smp_wmb(); > + kvm->arch.apic_map_dirty = false; > mutex_unlock(&kvm->arch.apic_map_lock); > > if (old) > @@ -257,20 +277,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > else > static_key_slow_inc(&apic_sw_disabled.key); > > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > } > > static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) > { > kvm_lapic_set_reg(apic, APIC_ID, id << 24); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) > { > kvm_lapic_set_reg(apic, APIC_LDR, id); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) > @@ -286,7 +306,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) > > kvm_lapic_set_reg(apic, APIC_ID, id); > kvm_lapic_set_reg(apic, APIC_LDR, ldr); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } > > static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) > @@ -1912,7 +1932,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) > case APIC_DFR: > if (!apic_x2apic_mode(apic)) { > kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); > - recalculate_apic_map(apic->vcpu->kvm); > + apic->vcpu->kvm->arch.apic_map_dirty = true; > } else > ret = 1; > break; > @@ -2018,6 +2038,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) > break; > } > > + kvm_recalculate_apic_map(apic->vcpu->kvm); > + > return ret; > } > EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); > @@ -2166,7 +2188,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > static_key_slow_dec_deferred(&apic_hw_disabled); > } else { > static_key_slow_inc(&apic_hw_disabled.key); > - recalculate_apic_map(vcpu->kvm); > + vcpu->kvm->arch.apic_map_dirty = true; > } > } > > @@ -2207,6 +2229,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > if (!apic) > return; > > + vcpu->kvm->arch.apic_map_dirty = false; > /* Stop the timer in case it's a reset to an active apic */ > hrtimer_cancel(&apic->lapic_timer.timer); > > @@ -2258,6 +2281,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > > vcpu->arch.apic_arb_prio = 0; > vcpu->arch.apic_attention = 0; > + > + kvm_recalculate_apic_map(vcpu->kvm); > } > > /* > @@ -2479,17 +2504,18 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) > struct kvm_lapic *apic = vcpu->arch.apic; > int r; > > - > kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); > /* set SPIV separately to get count of SW disabled APICs right */ > apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); > > r = kvm_apic_state_fixup(vcpu, s, true); > - if (r) > + if (r) { > + kvm_recalculate_apic_map(vcpu->kvm); > return r; > + } > memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); > > - recalculate_apic_map(vcpu->kvm); > + kvm_recalculate_apic_map(vcpu->kvm); > kvm_apic_set_version(vcpu); > > apic_update_ppr(apic); > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index ec6fbfe..7581bc2 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -78,6 +78,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); > void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); > u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); > +void kvm_recalculate_apic_map(struct kvm *kvm); > void kvm_apic_set_version(struct kvm_vcpu *vcpu); > int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val); > int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 79bc995..d3802a2 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -350,6 +350,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > } > > kvm_lapic_set_base(vcpu, msr_info->data); > + kvm_recalculate_apic_map(vcpu->kvm); > return 0; > } > EXPORT_SYMBOL_GPL(kvm_set_apic_base);
On 19/06/20 14:36, Igor Mammedov wrote: > qemu-kvm -m 2G -smp 4,maxcpus=8 -monitor stdio > (qemu) device_add qemu64-x86_64-cpu,socket-id=4,core-id=0,thread-id=0 > > in guest fails with: > > smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 > > which makes me suspect that INIT/SIPI wasn't delivered > > Is it a know issue? > No, it isn't. I'll revert. Paolo
On Fri, 19 Jun 2020 16:10:43 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote: > On 19/06/20 14:36, Igor Mammedov wrote: > > qemu-kvm -m 2G -smp 4,maxcpus=8 -monitor stdio > > (qemu) device_add qemu64-x86_64-cpu,socket-id=4,core-id=0,thread-id=0 > > > > in guest fails with: > > > > smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 > > > > which makes me suspect that INIT/SIPI wasn't delivered > > > > Is it a know issue? > > > > No, it isn't. I'll revert. wait for a day or 2, I'll try to come up with a fix over weekend. > Paolo >
On Fri, 19 Jun 2020 16:10:43 +0200 Paolo Bonzini <pbonzini@redhat.com> wrote: > On 19/06/20 14:36, Igor Mammedov wrote: > > qemu-kvm -m 2G -smp 4,maxcpus=8 -monitor stdio > > (qemu) device_add qemu64-x86_64-cpu,socket-id=4,core-id=0,thread-id=0 > > > > in guest fails with: > > > > smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 > > > > which makes me suspect that INIT/SIPI wasn't delivered > > > > Is it a know issue? > > > > No, it isn't. I'll revert. > > Paolo > Following fixes immediate issue: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 34a7e0533dad..6dc177da19da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2567,6 +2567,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + apic->vcpu->kvm->arch.apic_map_dirty = true; kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); Problem is that during kvm_arch_vcpu_create() new vcpu is not visible to kvm_recalculate_apic_map(), so whoever many times map update was called during it, it didn't affect apic map. What broke hotplug is that kvm_vcpu_ioctl_set_lapic -> kvm_apic_set_state, which is called after new vcpu is visible, used to make an unconditional update which pulled in the new vcpu, but with this patch the map update is gone since state hasn't actuaaly changed, so we lost the one call of kvm_recalculate_apic_map() which did actually matter. It happens to work for vcpus present at boot just by luck (BSP updates SPIV after all vcpus has been created which triggers kvm_recalculate_apic_map()) I'm not sending formal patch yet, since I have doubts wrt subj. following sequence looks like a race that can cause lost map update events: cpu1 cpu2 apic_map_dirty = true ------------------------------------------------------------ kvm_recalculate_apic_map: pass check mutex_lock(&kvm->arch.apic_map_lock); if (!kvm->arch.apic_map_dirty) and in process of updating map ------------------------------------------------------------- other calls to apic_map_dirty = true might be too late for affected cpu ------------------------------------------------------------- apic_map_dirty = false ------------------------------------------------------------- kvm_recalculate_apic_map: bail out on if (!kvm->arch.apic_map_dirty) it's safer to revert this patch for now like you have suggested earlier. If you prefer to keep it, I'll post above fixup as a patch.
On 22/06/20 00:26, Igor Mammedov wrote: > > following sequence looks like a race that can cause lost map update events: > > cpu1 cpu2 > > apic_map_dirty = true > ------------------------------------------------------------ > kvm_recalculate_apic_map: > pass check > mutex_lock(&kvm->arch.apic_map_lock); > if (!kvm->arch.apic_map_dirty) > and in process of updating map > ------------------------------------------------------------- > other calls to > apic_map_dirty = true might be too late for affected cpu > ------------------------------------------------------------- > apic_map_dirty = false > ------------------------------------------------------------- > kvm_recalculate_apic_map: > bail out on > if (!kvm->arch.apic_map_dirty) I will post a fix for that. Thanks for the analysis! Paolo
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 40a0c0f..4380ed1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -920,6 +920,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; + bool apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afcd30d..de832aa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -164,14 +164,28 @@ static void kvm_apic_map_free(struct rcu_head *rcu) kvfree(map); } -static void recalculate_apic_map(struct kvm *kvm) +void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; int i; u32 max_id = 255; /* enough space for any xAPIC ID */ + if (!kvm->arch.apic_map_dirty) { + /* + * Read kvm->arch.apic_map_dirty before + * kvm->arch.apic_map + */ + smp_rmb(); + return; + } + mutex_lock(&kvm->arch.apic_map_lock); + if (!kvm->arch.apic_map_dirty) { + /* Someone else has updated the map. */ + mutex_unlock(&kvm->arch.apic_map_lock); + return; + } kvm_for_each_vcpu(i, vcpu, kvm) if (kvm_apic_present(vcpu)) @@ -236,6 +250,12 @@ static void recalculate_apic_map(struct kvm *kvm) old = rcu_dereference_protected(kvm->arch.apic_map, lockdep_is_held(&kvm->arch.apic_map_lock)); rcu_assign_pointer(kvm->arch.apic_map, new); + /* + * Write kvm->arch.apic_map before + * clearing apic->apic_map_dirty + */ + smp_wmb(); + kvm->arch.apic_map_dirty = false; mutex_unlock(&kvm->arch.apic_map_lock); if (old) @@ -257,20 +277,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) else static_key_slow_inc(&apic_sw_disabled.key); - recalculate_apic_map(apic->vcpu->kvm); + apic->vcpu->kvm->arch.apic_map_dirty = true; } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - recalculate_apic_map(apic->vcpu->kvm); + apic->vcpu->kvm->arch.apic_map_dirty = true; } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - recalculate_apic_map(apic->vcpu->kvm); + apic->vcpu->kvm->arch.apic_map_dirty = true; } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) @@ -286,7 +306,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - recalculate_apic_map(apic->vcpu->kvm); + apic->vcpu->kvm->arch.apic_map_dirty = true; } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -1912,7 +1932,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_DFR: if (!apic_x2apic_mode(apic)) { kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - recalculate_apic_map(apic->vcpu->kvm); + apic->vcpu->kvm->arch.apic_map_dirty = true; } else ret = 1; break; @@ -2018,6 +2038,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; } + kvm_recalculate_apic_map(apic->vcpu->kvm); + return ret; } EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); @@ -2166,7 +2188,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) static_key_slow_dec_deferred(&apic_hw_disabled); } else { static_key_slow_inc(&apic_hw_disabled.key); - recalculate_apic_map(vcpu->kvm); + vcpu->kvm->arch.apic_map_dirty = true; } } @@ -2207,6 +2229,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!apic) return; + vcpu->kvm->arch.apic_map_dirty = false; /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2258,6 +2281,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; + + kvm_recalculate_apic_map(vcpu->kvm); } /* @@ -2479,17 +2504,18 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) struct kvm_lapic *apic = vcpu->arch.apic; int r; - kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); r = kvm_apic_state_fixup(vcpu, s, true); - if (r) + if (r) { + kvm_recalculate_apic_map(vcpu->kvm); return r; + } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - recalculate_apic_map(vcpu->kvm); + kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); apic_update_ppr(apic); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ec6fbfe..7581bc2 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -78,6 +78,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); +void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_apic_set_version(struct kvm_vcpu *vcpu); int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val); int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 79bc995..d3802a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -350,6 +350,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } kvm_lapic_set_base(vcpu, msr_info->data); + kvm_recalculate_apic_map(vcpu->kvm); return 0; } EXPORT_SYMBOL_GPL(kvm_set_apic_base);