diff mbox

[v4,3/8] KVM Guest: Add VCPU running/pre-empted state for guest

Message ID 20120821112630.3512.5109.stgit@abhimanyu (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Aug. 21, 2012, 11:26 a.m. UTC
From: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>

The patch adds guest code for msr between guest and hypervisor. The
msr will export the vcpu running/pre-empted information to the guest
from host. This will enable guest to intelligently send ipi to running
vcpus and set flag for pre-empted vcpus. This will prevent waiting for
vcpus that are not running.

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
---
 arch/x86/include/asm/kvm_para.h |   13 +++++++++++++
 arch/x86/kernel/kvm.c           |   36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 0 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Marcelo Tosatti Aug. 23, 2012, 9:36 a.m. UTC | #1
On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote:
> From: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
> 
> The patch adds guest code for msr between guest and hypervisor. The
> msr will export the vcpu running/pre-empted information to the guest
> from host. This will enable guest to intelligently send ipi to running
> vcpus and set flag for pre-empted vcpus. This will prevent waiting for
> vcpus that are not running.
> 
> Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/kvm_para.h |   13 +++++++++++++
>  arch/x86/kernel/kvm.c           |   36 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 49 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 2f7712e..5dfb975 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -23,6 +23,7 @@
>  #define KVM_FEATURE_ASYNC_PF		4
>  #define KVM_FEATURE_STEAL_TIME		5
>  #define KVM_FEATURE_PV_EOI		6
> +#define KVM_FEATURE_VCPU_STATE          7
>  
>  /* The last 8 bits are used to indicate how to interpret the flags field
>   * in pvclock structure. If no bits are set, all flags are ignored.
> @@ -39,6 +40,7 @@
>  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
>  #define MSR_KVM_STEAL_TIME  0x4b564d03
>  #define MSR_KVM_PV_EOI_EN      0x4b564d04
> +#define MSR_KVM_VCPU_STATE  0x4b564d05
>  
>  struct kvm_steal_time {
>  	__u64 steal;
> @@ -51,6 +53,17 @@ struct kvm_steal_time {
>  #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
>  #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
>  
> +struct kvm_vcpu_state {
> +	__u64 state;
> +	__u32 pad[14];
> +};
> +/* bits in vcpu_state->state */
> +#define KVM_VCPU_STATE_IN_GUEST_MODE 0
> +#define KVM_VCPU_STATE_SHOULD_FLUSH  1
> +
> +#define KVM_VCPU_STATE_ALIGN_BITS 5
> +#define KVM_VCPU_STATE_VALID_BITS ((-1ULL << (KVM_VCPU_STATE_ALIGN_BITS + 1)))
> +
>  #define KVM_MAX_MMU_OP_BATCH           32
>  
>  #define KVM_ASYNC_PF_ENABLED			(1 << 0)
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index c1d61ee..37e6599 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -66,6 +66,9 @@ static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
>  static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
>  static int has_steal_clock = 0;
>  
> +DEFINE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64);
> +static int has_vcpu_state;
> +
>  /*
>   * No need for any "IO delay" on KVM
>   */
> @@ -302,6 +305,22 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
>  	apic_write(APIC_EOI, APIC_EOI_ACK);
>  }
>  
> +static void kvm_register_vcpu_state(void)
> +{
> +	int cpu = smp_processor_id();
> +	struct kvm_vcpu_state *v_state;
> +
> +	if (!has_vcpu_state)
> +		return;
> +
> +	v_state = &per_cpu(vcpu_state, cpu);
> +	memset(v_state, 0, sizeof(*v_state));
> +
> +	wrmsrl(MSR_KVM_VCPU_STATE, (__pa(v_state) | KVM_MSR_ENABLED));
> +	printk(KERN_INFO "kvm-vcpustate: cpu %d, msr %lx\n",
> +		cpu, __pa(v_state));
> +}
> +
>  void __cpuinit kvm_guest_cpu_init(void)
>  {
>  	if (!kvm_para_available())
> @@ -330,6 +349,9 @@ void __cpuinit kvm_guest_cpu_init(void)
>  
>  	if (has_steal_clock)
>  		kvm_register_steal_time();
> +
> +	if (has_vcpu_state)
> +		kvm_register_vcpu_state();
>  }
>  
>  static void kvm_pv_disable_apf(void)
> @@ -393,6 +415,14 @@ void kvm_disable_steal_time(void)
>  	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
>  }
>  
> +void kvm_disable_vcpu_state(void)
> +{
> +	if (!has_vcpu_state)
> +		return;
> +
> +	wrmsr(MSR_KVM_VCPU_STATE, 0, 0);

wrmsrl (to be consistent).

> +}
> +
>  #ifdef CONFIG_SMP
>  static void __init kvm_smp_prepare_boot_cpu(void)
>  {
> @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
>  
>  static void kvm_guest_cpu_offline(void *dummy)
>  {
> +	kvm_disable_vcpu_state();

Should disable MSR at kvm_pv_guest_cpu_reboot.

>  	kvm_disable_steal_time();
>  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
>  		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
> @@ -469,6 +500,11 @@ void __init kvm_guest_init(void)
>  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
>  		apic_set_eoi_write(kvm_guest_apic_eoi_write);
>  
> +#ifdef CONFIG_PARAVIRT_TLB_FLUSH
> +	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
> +		has_vcpu_state = 1;
> +#endif

Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not
the rest of the code?

Is there a switch to enable/disable this feature on the kernel
command line? Grep for early_param in kvm.c.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikunj A. Dadhania Aug. 24, 2012, 5:39 a.m. UTC | #2
On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote:
> >  
> > +void kvm_disable_vcpu_state(void)
> > +{
> > +	if (!has_vcpu_state)
> > +		return;
> > +
> > +	wrmsr(MSR_KVM_VCPU_STATE, 0, 0);
> 
> wrmsrl (to be consistent).
>
Sure, will change
 
> > +}
> > +
> >  #ifdef CONFIG_SMP
> >  static void __init kvm_smp_prepare_boot_cpu(void)
> >  {
> > @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
> >  
> >  static void kvm_guest_cpu_offline(void *dummy)
> >  {
> > +	kvm_disable_vcpu_state();
> 
> Should disable MSR at kvm_pv_guest_cpu_reboot.
> 
Sure, can you explain the difference for my understanding?

> >  	kvm_disable_steal_time();
> >  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> >  		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
> > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void)
> >  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> >  		apic_set_eoi_write(kvm_guest_apic_eoi_write);
> >  
> > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH
> > +	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
> > +		has_vcpu_state = 1;
> > +#endif
> 
> Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not
> the rest of the code?
> 
The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as
the config also brings in HAVE_RCU_TABLE_FREE code into picture. We
should not enable this code without HAVE_RCU_TABLE_FREE.

Did not want to spray this across all the code, as the compiler will
take care of throwing out the kvm_tlb_flush_others

> Is there a switch to enable/disable this feature on the kernel
> command line? 
>
No, havent added it. 

> Grep for early_param in kvm.c.
> 
Let me know if that is required.

Regards
Nikunj

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 24, 2012, 3:02 p.m. UTC | #3
On Fri, Aug 24, 2012 at 11:09:39AM +0530, Nikunj A Dadhania wrote:
> On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote:
> > >  
> > > +void kvm_disable_vcpu_state(void)
> > > +{
> > > +	if (!has_vcpu_state)
> > > +		return;
> > > +
> > > +	wrmsr(MSR_KVM_VCPU_STATE, 0, 0);
> > 
> > wrmsrl (to be consistent).
> >
> Sure, will change
>  
> > > +}
> > > +
> > >  #ifdef CONFIG_SMP
> > >  static void __init kvm_smp_prepare_boot_cpu(void)
> > >  {
> > > @@ -410,6 +440,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
> > >  
> > >  static void kvm_guest_cpu_offline(void *dummy)
> > >  {
> > > +	kvm_disable_vcpu_state();
> > 
> > Should disable MSR at kvm_pv_guest_cpu_reboot.
> > 
> Sure, can you explain the difference for my understanding?

These are different callbacks. One is used for CPU offline, the
other during reboot.

> > >  	kvm_disable_steal_time();
> > >  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> > >  		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
> > > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void)
> > >  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> > >  		apic_set_eoi_write(kvm_guest_apic_eoi_write);
> > >  
> > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH
> > > +	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
> > > +		has_vcpu_state = 1;
> > > +#endif
> > 
> > Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not
> > the rest of the code?
> > 
> The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as
> the config also brings in HAVE_RCU_TABLE_FREE code into picture. We
> should not enable this code without HAVE_RCU_TABLE_FREE.
> 
> Did not want to spray this across all the code, as the compiler will
> take care of throwing out the kvm_tlb_flush_others
> 
> > Is there a switch to enable/disable this feature on the kernel
> > command line? 
> >
> No, havent added it. 
> 
> > Grep for early_param in kvm.c.
> > 
> Let me know if that is required.

Yes, please add it. Its useful.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikunj A. Dadhania Aug. 27, 2012, 4:24 a.m. UTC | #4
On Fri, 24 Aug 2012 12:02:27 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> On Fri, Aug 24, 2012 at 11:09:39AM +0530, Nikunj A Dadhania wrote:
> > On Thu, 23 Aug 2012 06:36:43 -0300, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > > On Tue, Aug 21, 2012 at 04:56:35PM +0530, Nikunj A. Dadhania wrote:
[...]

> > > > @@ -469,6 +500,11 @@ void __init kvm_guest_init(void)
> > > >  	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
> > > >  		apic_set_eoi_write(kvm_guest_apic_eoi_write);
> > > >  
> > > > +#ifdef CONFIG_PARAVIRT_TLB_FLUSH
> > > > +	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
> > > > +		has_vcpu_state = 1;
> > > > +#endif
> > > 
> > > Why only this hunk guarded by CONFIG_PARAVIRT_TLB_FLUSH and not
> > > the rest of the code?
> > > 
> > The guest should have been compiled with CONFIG_PARAVIRT_TLB_FLUSH, as
> > the config also brings in HAVE_RCU_TABLE_FREE code into picture. We
> > should not enable this code without HAVE_RCU_TABLE_FREE.
> > 
> > Did not want to spray this across all the code, as the compiler will
> > take care of throwing out the kvm_tlb_flush_others
> > 
> > > Is there a switch to enable/disable this feature on the kernel
> > > command line? 
> > >
> > No, havent added it. 
> > 
> > > Grep for early_param in kvm.c.
> > > 
> > Let me know if that is required.
> 
> Yes, please add it. Its useful.
> 
Done, will send it in my next version.

Nikunj

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2f7712e..5dfb975 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@ 
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_VCPU_STATE          7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@ 
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_VCPU_STATE  0x4b564d05
 
 struct kvm_steal_time {
 	__u64 steal;
@@ -51,6 +53,17 @@  struct kvm_steal_time {
 #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
 #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
 
+struct kvm_vcpu_state {
+	__u64 state;
+	__u32 pad[14];
+};
+/* bits in vcpu_state->state */
+#define KVM_VCPU_STATE_IN_GUEST_MODE 0
+#define KVM_VCPU_STATE_SHOULD_FLUSH  1
+
+#define KVM_VCPU_STATE_ALIGN_BITS 5
+#define KVM_VCPU_STATE_VALID_BITS ((-1ULL << (KVM_VCPU_STATE_ALIGN_BITS + 1)))
+
 #define KVM_MAX_MMU_OP_BATCH           32
 
 #define KVM_ASYNC_PF_ENABLED			(1 << 0)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c1d61ee..37e6599 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -66,6 +66,9 @@  static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
 static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
 
+DEFINE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64);
+static int has_vcpu_state;
+
 /*
  * No need for any "IO delay" on KVM
  */
@@ -302,6 +305,22 @@  static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
 	apic_write(APIC_EOI, APIC_EOI_ACK);
 }
 
+static void kvm_register_vcpu_state(void)
+{
+	int cpu = smp_processor_id();
+	struct kvm_vcpu_state *v_state;
+
+	if (!has_vcpu_state)
+		return;
+
+	v_state = &per_cpu(vcpu_state, cpu);
+	memset(v_state, 0, sizeof(*v_state));
+
+	wrmsrl(MSR_KVM_VCPU_STATE, (__pa(v_state) | KVM_MSR_ENABLED));
+	printk(KERN_INFO "kvm-vcpustate: cpu %d, msr %lx\n",
+		cpu, __pa(v_state));
+}
+
 void __cpuinit kvm_guest_cpu_init(void)
 {
 	if (!kvm_para_available())
@@ -330,6 +349,9 @@  void __cpuinit kvm_guest_cpu_init(void)
 
 	if (has_steal_clock)
 		kvm_register_steal_time();
+
+	if (has_vcpu_state)
+		kvm_register_vcpu_state();
 }
 
 static void kvm_pv_disable_apf(void)
@@ -393,6 +415,14 @@  void kvm_disable_steal_time(void)
 	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
+void kvm_disable_vcpu_state(void)
+{
+	if (!has_vcpu_state)
+		return;
+
+	wrmsr(MSR_KVM_VCPU_STATE, 0, 0);
+}
+
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
@@ -410,6 +440,7 @@  static void __cpuinit kvm_guest_cpu_online(void *dummy)
 
 static void kvm_guest_cpu_offline(void *dummy)
 {
+	kvm_disable_vcpu_state();
 	kvm_disable_steal_time();
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
@@ -469,6 +500,11 @@  void __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		apic_set_eoi_write(kvm_guest_apic_eoi_write);
 
+#ifdef CONFIG_PARAVIRT_TLB_FLUSH
+	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
+		has_vcpu_state = 1;
+#endif
+
 #ifdef CONFIG_SMP
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 	register_cpu_notifier(&kvm_cpu_notifier);