diff mbox

[v2,4/6] KVM: VMX: dynamise PLE window

Message ID 1408567997-21222-5-git-send-email-rkrcmar@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář Aug. 20, 2014, 8:53 p.m. UTC
Window is increased on every PLE exit and decreased on every sched_in.
The idea is that we don't want to PLE exit if there is no preemption
going on.
We do this with sched_in() because it does not hold rq lock.

There are two new kernel parameters for changing the window:
 ple_window_grow and ple_window_shrink
ple_window_grow affects the window on PLE exit and ple_window_shrink
does it on sched_in;  depending on their value, the window is modifier
like this: (ple_window is kvm_intel's global)

  ple_window_shrink/ |
  ple_window_grow    | PLE exit           | sched_in
  -------------------+--------------------+---------------------
  < 1                |  = ple_window      |  = ple_window
  < ple_window       | *= ple_window_grow | /= ple_window_shrink
  otherwise          | += ple_window_grow | -= ple_window_shrink

A third new parameter, ple_window_max, controls a maximal ple_window.
A minimum equals to ple_window.

Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini Aug. 21, 2014, 8:24 a.m. UTC | #1
Il 20/08/2014 22:53, Radim Kr?má? ha scritto:
> +static void update_ple_window_actual_max(void)
> +{
> +	ple_window_actual_max =
> +			__shrink_ple_window(max(ple_window_max, ple_window),

Why the max() here?

> +			                    ple_window_grow, INT_MIN);

This should be 0 (in fact you can probably make everything unsigned now
that you've sorted out the overflows).

Paolo

> +}
> +

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 21, 2014, 8:26 a.m. UTC | #2
Il 20/08/2014 22:53, Radim Kr?má? ha scritto:
> Window is increased on every PLE exit and decreased on every sched_in.
> The idea is that we don't want to PLE exit if there is no preemption
> going on.
> We do this with sched_in() because it does not hold rq lock.
> 
> There are two new kernel parameters for changing the window:
>  ple_window_grow and ple_window_shrink
> ple_window_grow affects the window on PLE exit and ple_window_shrink
> does it on sched_in;  depending on their value, the window is modifier
> like this: (ple_window is kvm_intel's global)
> 
>   ple_window_shrink/ |
>   ple_window_grow    | PLE exit           | sched_in
>   -------------------+--------------------+---------------------
>   < 1                |  = ple_window      |  = ple_window
>   < ple_window       | *= ple_window_grow | /= ple_window_shrink
>   otherwise          | += ple_window_grow | -= ple_window_shrink
> 
> A third new parameter, ple_window_max, controls a maximal ple_window.
> A minimum equals to ple_window.
> 
> Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 78 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 18e0e52..e63d7ac 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
>   * Time is measured based on a counter that runs at the same rate as the TSC,
>   * refer SDM volume 3b section 21.6.13 & 22.1.3.
>   */
> -#define KVM_VMX_DEFAULT_PLE_GAP    128
> -#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
> +#define KVM_VMX_DEFAULT_PLE_GAP           128
> +#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
> +#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
> +#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
> +#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
> +		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
> +
>  static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
>  module_param(ple_gap, int, S_IRUGO);
>  
>  static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
>  module_param(ple_window, int, S_IRUGO);
>  
> +/* Default doubles per-vcpu window every exit. */
> +static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
> +module_param(ple_window_grow, int, S_IRUGO);
> +
> +/* Default resets per-vcpu window every exit to ple_window. */
> +static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
> +module_param(ple_window_shrink, int, S_IRUGO);
> +
> +/* Default is to compute the maximum so we can never overflow. */
> +static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
> +static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
> +module_param(ple_window_max, int, S_IRUGO);
> +
>  extern const ulong vmx_return;
>  
>  #define NR_AUTOLOAD_MSRS 8
> @@ -5679,12 +5697,66 @@ out:
>  	return ret;
>  }
>  
> +static int __grow_ple_window(int val)
> +{
> +	if (ple_window_grow < 1)
> +		return ple_window;
> +
> +	val = min(val, ple_window_actual_max);
> +
> +	if (ple_window_grow < ple_window)
> +		val *= ple_window_grow;
> +	else
> +		val += ple_window_grow;
> +
> +	return val;
> +}
> +
> +static int __shrink_ple_window(int val, int shrinker, int minimum)

s/shrinker/factor/ or s/shrinker/param/ (shrinker has another meaning in
the kernel).

> +{
> +	if (shrinker < 1)
> +		return ple_window;
> +
> +	if (shrinker < ple_window)
> +		val /= shrinker;
> +	else
> +		val -= shrinker;
> +
> +	return max(val, minimum);

Any reason to use anything but ple_window as the minimum, even in
update_ple_window_actual_max?

> +}
> +
> +static void modify_ple_window(struct kvm_vcpu *vcpu, int grow)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int new;
> +
> +	if (grow)
> +		new = __grow_ple_window(vmx->ple_window);
> +	else
> +		new = __shrink_ple_window(vmx->ple_window, ple_window_shrink,
> +		                          ple_window);
> +
> +	vmx->ple_window = max(new, ple_window);
> +}
> +#define grow_ple_window(vcpu)   modify_ple_window(vcpu, 1)
> +#define shrink_ple_window(vcpu) modify_ple_window(vcpu, 0)

No macros please. :)

Paolo

> +
> +static void update_ple_window_actual_max(void)
> +{
> +	ple_window_actual_max =
> +			__shrink_ple_window(max(ple_window_max, ple_window),
> +			                    ple_window_grow, INT_MIN);
> +}
> +
>  /*
>   * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
>   * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
>   */
>  static int handle_pause(struct kvm_vcpu *vcpu)
>  {
> +	if (ple_gap)
> +		grow_ple_window(vcpu);
> +
>  	skip_emulated_instruction(vcpu);
>  	kvm_vcpu_on_spin(vcpu);
>  
> @@ -8854,6 +8926,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
>  
>  void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
>  {
> +	if (ple_gap)
> +		shrink_ple_window(vcpu);
>  }
>  
>  static struct kvm_x86_ops vmx_x86_ops = {
> @@ -9077,6 +9151,8 @@ static int __init vmx_init(void)
>  	} else
>  		kvm_disable_tdp();
>  
> +	update_ple_window_actual_max();
> +
>  	return 0;
>  
>  out7:
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Aug. 21, 2014, 11:47 a.m. UTC | #3
2014-08-21 10:24+0200, Paolo Bonzini:
> Il 20/08/2014 22:53, Radim Kr?má? ha scritto:
> > +static void update_ple_window_actual_max(void)
> > +{
> > +	ple_window_actual_max =
> > +			__shrink_ple_window(max(ple_window_max, ple_window),
> 
> Why the max() here?

To have ple_window act as a ple_window_min as well.
(When we are already preventing most stupid choices.)

And it's cheap ... I can add comment to this function :)

> > +			                    ple_window_grow, INT_MIN);
> 
> This should be 0 (in fact you can probably make everything unsigned now
> that you've sorted out the overflows).

Not in v2:
  val = min(vmx->ple_window, ple_window_actual_max);
  val += ple_window_grow;
  vmx->ple_window = val;
so we need to dip below zero to allow all possible grows.

(I'm not sure if anyone is ever going to use the additive option, so
 getting rid of it is also a valid choice -- code would be nicer.)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Aug. 21, 2014, 11:54 a.m. UTC | #4
2014-08-21 10:26+0200, Paolo Bonzini:
> Il 20/08/2014 22:53, Radim Kr?má? ha scritto:
> > +static int __shrink_ple_window(int val, int shrinker, int minimum)
> 
> s/shrinker/factor/ or s/shrinker/param/ (shrinker has another meaning in
> the kernel).

True, thanks.

> > +{
> > +	if (shrinker < 1)
> > +		return ple_window;
> > +
> > +	if (shrinker < ple_window)
> > +		val /= shrinker;
> > +	else
> > +		val -= shrinker;
> > +
> > +	return max(val, minimum);
> 
> Any reason to use anything but ple_window as the minimum, even in
> update_ple_window_actual_max?

ple_window_actual_max needs to be one grow below the ple_window_max, so
it can be lower than ple_window.

> > +}
> > +
> > +static void modify_ple_window(struct kvm_vcpu *vcpu, int grow)
> > +{
> > +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +	int new;
> > +
> > +	if (grow)
> > +		new = __grow_ple_window(vmx->ple_window);
> > +	else
> > +		new = __shrink_ple_window(vmx->ple_window, ple_window_shrink,
> > +		                          ple_window);
> > +
> > +	vmx->ple_window = max(new, ple_window);
> > +}
> > +#define grow_ple_window(vcpu)   modify_ple_window(vcpu, 1)
> > +#define shrink_ple_window(vcpu) modify_ple_window(vcpu, 0)
> 
> No macros please. :)

Guity as charged.
Using 0/1 or true/false in this context directly would be pretty bad ...
Is enum fine? (SHIRINK_PLE_WINDOW, GROW_PLE_WINDOW?)

(I can always make it into function pointers ;)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 21, 2014, 12:29 p.m. UTC | #5
Il 21/08/2014 13:54, Radim Kr?má? ha scritto:
> Guity as charged.
> Using 0/1 or true/false in this context directly would be pretty bad ...
> Is enum fine? (SHIRINK_PLE_WINDOW, GROW_PLE_WINDOW?)

I prefer good old Ctrl-C Ctrl-V (adjusted for your favorite editor). :)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Aug. 21, 2014, 12:42 p.m. UTC | #6
2014-08-21 14:29+0200, Paolo Bonzini:
> Il 21/08/2014 13:54, Radim Kr?má? ha scritto:
> > Guity as charged.
> > Using 0/1 or true/false in this context directly would be pretty bad ...
> > Is enum fine? (SHIRINK_PLE_WINDOW, GROW_PLE_WINDOW?)
> 
> I prefer good old Ctrl-C Ctrl-V (adjusted for your favorite editor). :)

I, I should be able to do it, but nightmares are going to last a while.

(And to lower chances of v4: are tracepoint macros, like for kvm_apic,
 frowned upon now?)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 21, 2014, 1:18 p.m. UTC | #7
Il 21/08/2014 14:42, Radim Kr?má? ha scritto:
> 2014-08-21 14:29+0200, Paolo Bonzini:
>> Il 21/08/2014 13:54, Radim Kr?má? ha scritto:
>>> Guity as charged.
>>> Using 0/1 or true/false in this context directly would be pretty bad ...
>>> Is enum fine? (SHIRINK_PLE_WINDOW, GROW_PLE_WINDOW?)
>>
>> I prefer good old Ctrl-C Ctrl-V (adjusted for your favorite editor). :)
> 
> I, I should be able to do it, but nightmares are going to last a while.
> 
> (And to lower chances of v4: are tracepoint macros, like for kvm_apic,
>  frowned upon now?)

Nah, go ahead.  But is there any example of #define like what you were
doing?  I tried grepping for "#define.*[a-z]\(" and didn't get anything
similar.

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Aug. 21, 2014, 1:46 p.m. UTC | #8
2014-08-21 15:18+0200, Paolo Bonzini:
> Il 21/08/2014 14:42, Radim Kr?má? ha scritto:
> > 2014-08-21 14:29+0200, Paolo Bonzini:
> >> Il 21/08/2014 13:54, Radim Kr?má? ha scritto:
> >>> Guity as charged.
> >>> Using 0/1 or true/false in this context directly would be pretty bad ...
> >>> Is enum fine? (SHIRINK_PLE_WINDOW, GROW_PLE_WINDOW?)
> >>
> >> I prefer good old Ctrl-C Ctrl-V (adjusted for your favorite editor). :)
> > 
> > I, I should be able to do it, but nightmares are going to last a while.
> > 
> > (And to lower chances of v4: are tracepoint macros, like for kvm_apic,
> >  frowned upon now?)
> 
> Nah, go ahead.  But is there any example of #define like what you were
> doing?  I tried grepping for "#define.*[a-z]\(" and didn't get anything
> similar.

No, at least not in KVM code, it was the same idea as tracepoints:
less magical constants at the cost of (syntactic) indirection.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18e0e52..e63d7ac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@  module_param(nested, bool, S_IRUGO);
  * Time is measured based on a counter that runs at the same rate as the TSC,
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
-#define KVM_VMX_DEFAULT_PLE_GAP    128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP           128
+#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
+		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
 
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
 extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
@@ -5679,12 +5697,66 @@  out:
 	return ret;
 }
 
+static int __grow_ple_window(int val)
+{
+	if (ple_window_grow < 1)
+		return ple_window;
+
+	val = min(val, ple_window_actual_max);
+
+	if (ple_window_grow < ple_window)
+		val *= ple_window_grow;
+	else
+		val += ple_window_grow;
+
+	return val;
+}
+
+static int __shrink_ple_window(int val, int shrinker, int minimum)
+{
+	if (shrinker < 1)
+		return ple_window;
+
+	if (shrinker < ple_window)
+		val /= shrinker;
+	else
+		val -= shrinker;
+
+	return max(val, minimum);
+}
+
+static void modify_ple_window(struct kvm_vcpu *vcpu, int grow)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int new;
+
+	if (grow)
+		new = __grow_ple_window(vmx->ple_window);
+	else
+		new = __shrink_ple_window(vmx->ple_window, ple_window_shrink,
+		                          ple_window);
+
+	vmx->ple_window = max(new, ple_window);
+}
+#define grow_ple_window(vcpu)   modify_ple_window(vcpu, 1)
+#define shrink_ple_window(vcpu) modify_ple_window(vcpu, 0)
+
+static void update_ple_window_actual_max(void)
+{
+	ple_window_actual_max =
+			__shrink_ple_window(max(ple_window_max, ple_window),
+			                    ple_window_grow, INT_MIN);
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+	if (ple_gap)
+		grow_ple_window(vcpu);
+
 	skip_emulated_instruction(vcpu);
 	kvm_vcpu_on_spin(vcpu);
 
@@ -8854,6 +8926,8 @@  static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 
 void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	if (ple_gap)
+		shrink_ple_window(vcpu);
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -9077,6 +9151,8 @@  static int __init vmx_init(void)
 	} else
 		kvm_disable_tdp();
 
+	update_ple_window_actual_max();
+
 	return 0;
 
 out7: