Message ID | 20120821112630.3512.5109.stgit@abhimanyu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote: > From: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com> > > The patch adds guest code for msr between guest and hypervisor. The > msr will export the vcpu running/pre-empted information to the guest > from host. This will enable guest to intelligently send ipi to running > vcpus and set flag for pre-empted vcpus. This will prevent waiting for > vcpus that are not running. > > Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl> > Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com> > --- > arch/x86/include/asm/kvm_para.h | 13 +++++++++++++ > arch/x86/kernel/kvm.c | 36 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 49 insertions(+), 0 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h > index 2f7712e..5dfb975 100644 > --- a/arch/x86/include/asm/kvm_para.h > +++ b/arch/x86/include/asm/kvm_para.h > @@ -23,6 +23,7 @@ > #define KVM_FEATURE_ASYNC_PF 4 > #define KVM_FEATURE_STEAL_TIME 5 > #define KVM_FEATURE_PV_EOI 6 > +#define KVM_FEATURE_VCPU_STATE 7 > > /* The last 8 bits are used to indicate how to interpret the flags field > * in pvclock structure. If no bits are set, all flags are ignored. > @@ -39,6 +40,7 @@ > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > #define MSR_KVM_STEAL_TIME 0x4b564d03 > #define MSR_KVM_PV_EOI_EN 0x4b564d04 > +#define MSR_KVM_VCPU_STATE 0x4b564d05 > > struct kvm_steal_time { > __u64 steal; > @@ -51,6 +53,17 @@ struct kvm_steal_time { > #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) > #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) > > +struct kvm_vcpu_state { > + __u64 state; > + __u32 pad[14]; > +}; > +/* bits in vcpu_state->state */ > +#define KVM_VCPU_STATE_IN_GUEST_MODE 0 > +#define KVM_VCPU_STATE_SHOULD_FLUSH 1 > + > +#define KVM_VCPU_STATE_ALIGN_BITS 5 > +#define KVM_VCPU_STATE_VALID_BITS ((-1ULL << (KVM_VCPU_STATE_ALIGN_BITS + 1))) > + > #define KVM_MAX_MMU_OP_BATCH 32 > > #define KVM_ASYNC_PF_ENABLED (1 << 0) > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index c1d61ee..37e6599 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -66,6 +66,9 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); > static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); > static int has_steal_clock = 0; > > +DEFINE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64); > +static int has_vcpu_state; > + > /* > * No need for any "IO delay" on KVM > */ > @@ -302,6 +305,22 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) > apic_write(APIC_EOI, APIC_EOI_ACK); > } > > +static void kvm_register_vcpu_state(void) > +{ > + int cpu = smp_processor_id(); > + struct kvm_vcpu_state *v_state; > + > + if (!has_vcpu_state) > + return; > + > + v_state = &per_cpu(vcpu_state, cpu); > + memset(v_state, 0, sizeof(*v_state)); > + > + wrmsrl(MSR_KVM_VCPU_STATE, (__pa(v_state) | KVM_MSR_ENABLED)); > + printk(KERN_INFO "kvm-vcpustate: cpu %d, msr %lx\n", > + cpu, __pa(v_state)); > +} > + > void __cpuinit kvm_guest_cpu_init(void) > { > if (!kvm_para_available()) > @@ -330,6 +349,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > if (has_steal_clock) > kvm_register_steal_time(); > + > + if (has_vcpu_state) > + kvm_register_vcpu_state(); > } > > static void kvm_pv_disable_apf(void) > @@ -393,6 +415,14 @@ void kvm_disable_steal_time(void) > wrmsr(MSR_KVM_STEAL_TIME, 0, 0); > } > > +void kvm_disable_vcpu_state(void) > +{ > + if (!has_vcpu_state) > + return; > + > + wrmsr(MSR_KVM_VCPU_STATE, 0, 0); wrmsrl (to be consistent). > +} > + > #ifdef CONFIG_SMP > static void __init kvm_smp_prepare_boot_cpu(void) > { > @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) > > static void kvm_guest_cpu_offline(void *dummy) > { > + kvm_disable_vcpu_state(); Should disable MSR at kvm_pv_guest_cpu_reboot. > kvm_disable_steal_time(); > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > wrmsrl(MSR_KVM_PV_EOI_EN, 0); > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void) > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > apic_set_eoi_write(kvm_guest_apic_eoi_write); > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH > + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) > + has_vcpu_state = 1; > +#endif Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not the rest of the code? Is there a switch to enable/disable this feature on the kernel command line? Grep for early_param in kvm.c. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote: > On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote: > > > > +void kvm_disable_vcpu_state(void) > > +{ > > + if (!has_vcpu_state) > > + return; > > + > > + wrmsr(MSR_KVM_VCPU_STATE, 0, 0); > > wrmsrl (to be consistent). > Sure, will change > > +} > > + > > #ifdef CONFIG_SMP > > static void __init kvm_smp_prepare_boot_cpu(void) > > { > > @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) > > > > static void kvm_guest_cpu_offline(void *dummy) > > { > > + kvm_disable_vcpu_state(); > > Should disable MSR at kvm_pv_guest_cpu_reboot. > Sure, can you explain the difference for my understanding? > > kvm_disable_steal_time(); > > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > > wrmsrl(MSR_KVM_PV_EOI_EN, 0); > > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void) > > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > > apic_set_eoi_write(kvm_guest_apic_eoi_write); > > > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH > > + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) > > + has_vcpu_state = 1; > > +#endif > > Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not > the rest of the code? > The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as the config also brings in HAVE_RCU_TABLE_FREE code into picture. We should not enable this code without HAVE_RCU_TABLE_FREE. Did not want to spray this across all the code, as the compiler will take care of throwing out the kvm_tlb_flush_others > Is there a switch to enable/disable this feature on the kernel > command line? > No, havent added it. > Grep for early_param in kvm.c. > Let me know if that is required. Regards Nikunj -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 24, 2012 at 11:09:39AM +0530, Nikunj A Dadhania wrote: > On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote: > > On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote: > > > > > > +void kvm_disable_vcpu_state(void) > > > +{ > > > + if (!has_vcpu_state) > > > + return; > > > + > > > + wrmsr(MSR_KVM_VCPU_STATE, 0, 0); > > > > wrmsrl (to be consistent). > > > Sure, will change > > > > +} > > > + > > > #ifdef CONFIG_SMP > > > static void __init kvm_smp_prepare_boot_cpu(void) > > > { > > > @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) > > > > > > static void kvm_guest_cpu_offline(void *dummy) > > > { > > > + kvm_disable_vcpu_state(); > > > > Should disable MSR at kvm_pv_guest_cpu_reboot. > > > Sure, can you explain the difference for my understanding? These are different callbacks. One is used for CPU offline, the other during reboot. > > > kvm_disable_steal_time(); > > > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > > > wrmsrl(MSR_KVM_PV_EOI_EN, 0); > > > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void) > > > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > > > apic_set_eoi_write(kvm_guest_apic_eoi_write); > > > > > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH > > > + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) > > > + has_vcpu_state = 1; > > > +#endif > > > > Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not > > the rest of the code? > > > The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as > the config also brings in HAVE_RCU_TABLE_FREE code into picture. We > should not enable this code without HAVE_RCU_TABLE_FREE. > > Did not want to spray this across all the code, as the compiler will > take care of throwing out the kvm_tlb_flush_others > > > Is there a switch to enable/disable this feature on the kernel > > command line? > > > No, havent added it. > > > Grep for early_param in kvm.c. > > > Let me know if that is required. Yes, please add it. Its useful. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 24 Aug 2012 12:02:27 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote: > On Fri, Aug 24, 2012 at 11:09:39AM +0530, Nikunj A Dadhania wrote: > > On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote: > > > On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote: [...] > > > > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void) > > > > if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) > > > > apic_set_eoi_write(kvm_guest_apic_eoi_write); > > > > > > > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH > > > > + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) > > > > + has_vcpu_state = 1; > > > > +#endif > > > > > > Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not > > > the rest of the code? > > > > > The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as > > the config also brings in HAVE_RCU_TABLE_FREE code into picture. We > > should not enable this code without HAVE_RCU_TABLE_FREE. > > > > Did not want to spray this across all the code, as the compiler will > > take care of throwing out the kvm_tlb_flush_others > > > > > Is there a switch to enable/disable this feature on the kernel > > > command line? > > > > > No, havent added it. > > > > > Grep for early_param in kvm.c. > > > > > Let me know if that is required. > > Yes, please add it. Its useful. > Done, will send it in my next version. Nikunj -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2f7712e..5dfb975 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -23,6 +23,7 @@ #define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_VCPU_STATE 7 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -39,6 +40,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_VCPU_STATE 0x4b564d05 struct kvm_steal_time { __u64 steal; @@ -51,6 +53,17 @@ struct kvm_steal_time { #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) +struct kvm_vcpu_state { + __u64 state; + __u32 pad[14]; +}; +/* bits in vcpu_state->state */ +#define KVM_VCPU_STATE_IN_GUEST_MODE 0 +#define KVM_VCPU_STATE_SHOULD_FLUSH 1 + +#define KVM_VCPU_STATE_ALIGN_BITS 5 +#define KVM_VCPU_STATE_VALID_BITS ((-1ULL << (KVM_VCPU_STATE_ALIGN_BITS + 1))) + #define KVM_MAX_MMU_OP_BATCH 32 #define KVM_ASYNC_PF_ENABLED (1 << 0) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c1d61ee..37e6599 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -66,6 +66,9 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); static int has_steal_clock = 0; +DEFINE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64); +static int has_vcpu_state; + /* * No need for any "IO delay" on KVM */ @@ -302,6 +305,22 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) apic_write(APIC_EOI, APIC_EOI_ACK); } +static void kvm_register_vcpu_state(void) +{ + int cpu = smp_processor_id(); + struct kvm_vcpu_state *v_state; + + if (!has_vcpu_state) + return; + + v_state = &per_cpu(vcpu_state, cpu); + memset(v_state, 0, sizeof(*v_state)); + + wrmsrl(MSR_KVM_VCPU_STATE, (__pa(v_state) | KVM_MSR_ENABLED)); + printk(KERN_INFO "kvm-vcpustate: cpu %d, msr %lx\n", + cpu, __pa(v_state)); +} + void __cpuinit kvm_guest_cpu_init(void) { if (!kvm_para_available()) @@ -330,6 +349,9 @@ void __cpuinit kvm_guest_cpu_init(void) if (has_steal_clock) kvm_register_steal_time(); + + if (has_vcpu_state) + kvm_register_vcpu_state(); } static void kvm_pv_disable_apf(void) @@ -393,6 +415,14 @@ void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } +void kvm_disable_vcpu_state(void) +{ + if (!has_vcpu_state) + return; + + wrmsr(MSR_KVM_VCPU_STATE, 0, 0); +} + #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) static void kvm_guest_cpu_offline(void *dummy) { + kvm_disable_vcpu_state(); kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); @@ -469,6 +500,11 @@ void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); +#ifdef CONFIG_PARAVIRT_TLB_FLUSH + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) + has_vcpu_state = 1; +#endif + #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; register_cpu_notifier(&kvm_cpu_notifier);