diff mbox

[7/7] KVM-GST: KVM Steal time registration

Message ID 1308007897-17013-8-git-send-email-glommer@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Glauber Costa June 13, 2011, 11:31 p.m. UTC
Register steal time within KVM. Everytime we sample the steal time
information, we update a local variable that tells what was the
last time read. We then account the difference.

Signed-off-by: Glauber Costa <glommer@redhat.com>
CC: Rik van Riel <riel@redhat.com>
CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Avi Kivity <avi@redhat.com>
CC: Anthony Liguori <aliguori@us.ibm.com>
CC: Eric B Munson <emunson@mgebm.net>
---
 Documentation/kernel-parameters.txt |    4 ++
 arch/x86/include/asm/kvm_para.h     |    1 +
 arch/x86/kernel/kvm.c               |   72 +++++++++++++++++++++++++++++++++++
 arch/x86/kernel/kvmclock.c          |    2 +
 4 files changed, 79 insertions(+), 0 deletions(-)

Comments

Eric B Munson June 14, 2011, 1:21 a.m. UTC | #1
On Mon, 13 Jun 2011, Glauber Costa wrote:

> Register steal time within KVM. Everytime we sample the steal time
> information, we update a local variable that tells what was the
> last time read. We then account the difference.
> 
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> CC: Rik van Riel <riel@redhat.com>
> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Avi Kivity <avi@redhat.com>
> CC: Anthony Liguori <aliguori@us.ibm.com>
> CC: Eric B Munson <emunson@mgebm.net>

Tested-by: Eric B Munson <emunson@mgebm.net>
Gleb Natapov June 14, 2011, 8:06 a.m. UTC | #2
On Mon, Jun 13, 2011 at 07:31:37PM -0400, Glauber Costa wrote:
> Register steal time within KVM. Everytime we sample the steal time
> information, we update a local variable that tells what was the
> last time read. We then account the difference.
> 
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> CC: Rik van Riel <riel@redhat.com>
> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Avi Kivity <avi@redhat.com>
> CC: Anthony Liguori <aliguori@us.ibm.com>
> CC: Eric B Munson <emunson@mgebm.net>
> ---
>  Documentation/kernel-parameters.txt |    4 ++
>  arch/x86/include/asm/kvm_para.h     |    1 +
>  arch/x86/kernel/kvm.c               |   72 +++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/kvmclock.c          |    2 +
>  4 files changed, 79 insertions(+), 0 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index fd248a31..a722574 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -1737,6 +1737,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
>  			fault handling.
>  
> +	no-steal-acc    [X86,KVM] Disable paravirtualized steal time accounting.
> +			steal time is computed, but won't influence scheduler
> +			behaviour
> +
>  	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
>  
>  	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 0341e61..2a8f2a5 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -94,6 +94,7 @@ struct kvm_vcpu_pv_apf_data {
>  
>  extern void kvmclock_init(void);
>  extern int kvm_register_clock(char *txt);
> +extern void kvm_disable_steal_time(void);
>  
>  
>  /* This instruction is vmcall.  On non-VT architectures, it will generate a
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 33c07b0..5a5ac19 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -51,6 +51,15 @@ static int parse_no_kvmapf(char *arg)
>  
>  early_param("no-kvmapf", parse_no_kvmapf);
>  
> +static int steal_acc = 1;
> +static int parse_no_stealacc(char *arg)
> +{
> +        steal_acc = 0;
> +        return 0;
> +}
> +
> +early_param("no-steal-acc", parse_no_stealacc);
> +
>  struct kvm_para_state {
>  	u8 mmu_queue[MMU_QUEUE_SIZE];
>  	int mmu_queue_len;
> @@ -58,6 +67,8 @@ struct kvm_para_state {
>  
>  static DEFINE_PER_CPU(struct kvm_para_state, para_state);
>  static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
> +static int has_steal_clock = 0;
>  
>  static struct kvm_para_state *kvm_para_state(void)
>  {
> @@ -483,23 +494,66 @@ static struct notifier_block kvm_pv_reboot_nb = {
>  	.notifier_call = kvm_pv_reboot_notify,
>  };
>  
> +static void kvm_register_steal_time(void)
> +{
> +	int cpu = smp_processor_id();
> +	struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
> +
> +	if (!has_steal_clock)
> +		return;
> +
> +	memset(st, 0, sizeof(*st));
> +
> +	wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
> +	printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
> +		cpu, __pa(st));
> +}
> +
> +static u64 kvm_steal_clock(int cpu)
> +{
> +	u64 steal;
> +	struct kvm_steal_time *src;
> +	int version;
> +
> +	src = &per_cpu(steal_time, cpu);
> +	do {
> +		version = src->version;
> +		rmb();
> +		steal = src->steal;
> +		rmb();
> +	} while ((version & 1) || (version != src->version));
> +
> +	return steal;
> +}
> +
>  #ifdef CONFIG_SMP
>  static void __init kvm_smp_prepare_boot_cpu(void)
>  {
>  #ifdef CONFIG_KVM_CLOCK
>  	WARN_ON(kvm_register_clock("primary cpu clock"));
>  #endif
> +	kvm_register_steal_time();
>  	kvm_guest_cpu_init();
>  	native_smp_prepare_boot_cpu();
>  }
>  
>  static void __cpuinit kvm_guest_cpu_online(void *dummy)
>  {
> +	kvm_register_steal_time();
>  	kvm_guest_cpu_init();
>  }
>  
Why not call kvm_register_steal_time() from kvm_guest_cpu_init()?
This way you save one line of code and steal time will be initialized
in !CONFIG_SMP kernel too.

> +void kvm_disable_steal_time(void)
> +{
> +	if (!has_steal_clock)
> +		return;
> +
> +	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
> +}
> +
>  static void kvm_guest_cpu_offline(void *dummy)
>  {
> +	kvm_disable_steal_time();
>  	kvm_pv_disable_apf(NULL);
>  	apf_task_wake_all();
>  }
> @@ -548,6 +602,11 @@ void __init kvm_guest_init(void)
>  	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
>  		x86_init.irqs.trap_init = kvm_apf_trap_init;
>  
> +	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
> +		has_steal_clock = 1;
> +		pv_time_ops.steal_clock = kvm_steal_clock;
> +	}
> +
>  #ifdef CONFIG_SMP
>  	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
>  	register_cpu_notifier(&kvm_cpu_notifier);
> @@ -555,3 +614,16 @@ void __init kvm_guest_init(void)
>  	kvm_guest_cpu_init();
>  #endif
>  }
> +
> +static __init int activate_jump_labels(void)
> +{
> +	if (has_steal_clock) {
> +		jump_label_inc(&paravirt_steal_enabled);
> +		if (steal_acc)
> +			jump_label_inc(&paravirt_steal_rq_enabled);
> +	}
> +
> +	return 0;
> +}
> +arch_initcall(activate_jump_labels);
> +
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index 6389a6b..c1a0188 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -160,6 +160,7 @@ static void __cpuinit kvm_setup_secondary_clock(void)
>  static void kvm_crash_shutdown(struct pt_regs *regs)
>  {
>  	native_write_msr(msr_kvm_system_time, 0, 0);
> +	kvm_disable_steal_time();
>  	native_machine_crash_shutdown(regs);
>  }
>  #endif
> @@ -167,6 +168,7 @@ static void kvm_crash_shutdown(struct pt_regs *regs)
>  static void kvm_shutdown(void)
>  {
>  	native_write_msr(msr_kvm_system_time, 0, 0);
> +	kvm_disable_steal_time();
>  	native_machine_shutdown();
>  }
>  
> -- 
> 1.7.3.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fd248a31..a722574 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1737,6 +1737,10 @@  bytes respectively. Such letter suffixes can also be entirely omitted.
 	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
 			fault handling.
 
+	no-steal-acc    [X86,KVM] Disable paravirtualized steal time accounting.
+			steal time is computed, but won't influence scheduler
+			behaviour
+
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 0341e61..2a8f2a5 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -94,6 +94,7 @@  struct kvm_vcpu_pv_apf_data {
 
 extern void kvmclock_init(void);
 extern int kvm_register_clock(char *txt);
+extern void kvm_disable_steal_time(void);
 
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 33c07b0..5a5ac19 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -51,6 +51,15 @@  static int parse_no_kvmapf(char *arg)
 
 early_param("no-kvmapf", parse_no_kvmapf);
 
+static int steal_acc = 1;
+static int parse_no_stealacc(char *arg)
+{
+        steal_acc = 0;
+        return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
 struct kvm_para_state {
 	u8 mmu_queue[MMU_QUEUE_SIZE];
 	int mmu_queue_len;
@@ -58,6 +67,8 @@  struct kvm_para_state {
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
 static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock = 0;
 
 static struct kvm_para_state *kvm_para_state(void)
 {
@@ -483,23 +494,66 @@  static struct notifier_block kvm_pv_reboot_nb = {
 	.notifier_call = kvm_pv_reboot_notify,
 };
 
+static void kvm_register_steal_time(void)
+{
+	int cpu = smp_processor_id();
+	struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
+
+	if (!has_steal_clock)
+		return;
+
+	memset(st, 0, sizeof(*st));
+
+	wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
+	printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
+		cpu, __pa(st));
+}
+
+static u64 kvm_steal_clock(int cpu)
+{
+	u64 steal;
+	struct kvm_steal_time *src;
+	int version;
+
+	src = &per_cpu(steal_time, cpu);
+	do {
+		version = src->version;
+		rmb();
+		steal = src->steal;
+		rmb();
+	} while ((version & 1) || (version != src->version));
+
+	return steal;
+}
+
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
 #ifdef CONFIG_KVM_CLOCK
 	WARN_ON(kvm_register_clock("primary cpu clock"));
 #endif
+	kvm_register_steal_time();
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
 }
 
 static void __cpuinit kvm_guest_cpu_online(void *dummy)
 {
+	kvm_register_steal_time();
 	kvm_guest_cpu_init();
 }
 
+void kvm_disable_steal_time(void)
+{
+	if (!has_steal_clock)
+		return;
+
+	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
 static void kvm_guest_cpu_offline(void *dummy)
 {
+	kvm_disable_steal_time();
 	kvm_pv_disable_apf(NULL);
 	apf_task_wake_all();
 }
@@ -548,6 +602,11 @@  void __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
 		x86_init.irqs.trap_init = kvm_apf_trap_init;
 
+	if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+		has_steal_clock = 1;
+		pv_time_ops.steal_clock = kvm_steal_clock;
+	}
+
 #ifdef CONFIG_SMP
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 	register_cpu_notifier(&kvm_cpu_notifier);
@@ -555,3 +614,16 @@  void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 }
+
+static __init int activate_jump_labels(void)
+{
+	if (has_steal_clock) {
+		jump_label_inc(&paravirt_steal_enabled);
+		if (steal_acc)
+			jump_label_inc(&paravirt_steal_rq_enabled);
+	}
+
+	return 0;
+}
+arch_initcall(activate_jump_labels);
+
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 6389a6b..c1a0188 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -160,6 +160,7 @@  static void __cpuinit kvm_setup_secondary_clock(void)
 static void kvm_crash_shutdown(struct pt_regs *regs)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
+	kvm_disable_steal_time();
 	native_machine_crash_shutdown(regs);
 }
 #endif
@@ -167,6 +168,7 @@  static void kvm_crash_shutdown(struct pt_regs *regs)
 static void kvm_shutdown(void)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
+	kvm_disable_steal_time();
 	native_machine_shutdown();
 }