From patchwork Wed Aug 20 20:53:15 2014
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: =?utf-8?b?UmFkaW0gS3LEjW3DocWZ?= <rkrcmar@redhat.com>
X-Patchwork-Id: 4753901
Return-Path: <kvm-owner@kernel.org>
X-Original-To: patchwork-kvm@patchwork.kernel.org
Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org
Received: from mail.kernel.org (mail.kernel.org [198.145.19.201])
	by patchwork1.web.kernel.org (Postfix) with ESMTP id E33199F38D
	for <patchwork-kvm@patchwork.kernel.org>;
	Wed, 20 Aug 2014 20:55:33 +0000 (UTC)
Received: from mail.kernel.org (localhost [127.0.0.1])
	by mail.kernel.org (Postfix) with ESMTP id 00619200E7
	for <patchwork-kvm@patchwork.kernel.org>;
	Wed, 20 Aug 2014 20:55:33 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.kernel.org (Postfix) with ESMTP id A8F7A2010B
	for <patchwork-kvm@patchwork.kernel.org>;
	Wed, 20 Aug 2014 20:55:27 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753129AbaHTUyR (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Wed, 20 Aug 2014 16:54:17 -0400
Received: from mx1.redhat.com ([209.132.183.28]:62649 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1752936AbaHTUyP (ORCPT <rfc822;kvm@vger.kernel.org>);
	Wed, 20 Aug 2014 16:54:15 -0400
Received: from int-mx14.intmail.prod.int.phx2.redhat.com
	(int-mx14.intmail.prod.int.phx2.redhat.com [10.5.11.27])
	by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id s7KKs7lK025274
	(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256
	verify=OK); Wed, 20 Aug 2014 16:54:07 -0400
Received: from potion (dhcp-1-113.brq.redhat.com [10.34.1.113])
	by int-mx14.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with
	SMTP id s7KKs4re013993; Wed, 20 Aug 2014 16:54:04 -0400
Received: by potion (sSMTP sendmail emulation);
	Wed, 20 Aug 2014 22:54:03 +0200
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com>,
	Gleb Natapov <gleb@kernel.org>,
	Raghavendra KT <raghavendra.kt@linux.vnet.ibm.com>,
	Vinod Chegu <chegu_vinod@hp.com>, Hui-Zhi <hui-zhi.zhao@hp.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>
Subject: [PATCH v2 4/6] KVM: VMX: dynamise PLE window
Date: Wed, 20 Aug 2014 22:53:15 +0200
Message-Id: <1408567997-21222-5-git-send-email-rkrcmar@redhat.com>
In-Reply-To: <1408567997-21222-1-git-send-email-rkrcmar@redhat.com>
References: <1408567997-21222-1-git-send-email-rkrcmar@redhat.com>
MIME-Version: 1.0
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.27
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Spam-Status: No, score=-7.6 required=5.0 tests=BAYES_00,HK_RANDOM_FROM,
	RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD,
	UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

Window is increased on every PLE exit and decreased on every sched_in.
The idea is that we don't want to PLE exit if there is no preemption
going on.
We do this with sched_in() because it does not hold rq lock.

There are two new kernel parameters for changing the window:
 ple_window_grow and ple_window_shrink
ple_window_grow affects the window on PLE exit and ple_window_shrink
does it on sched_in;  depending on their value, the window is modifier
like this: (ple_window is kvm_intel's global)

  ple_window_shrink/ |
  ple_window_grow    | PLE exit           | sched_in
  -------------------+--------------------+---------------------
  < 1                |  = ple_window      |  = ple_window
  < ple_window       | *= ple_window_grow | /= ple_window_shrink
  otherwise          | += ple_window_grow | -= ple_window_shrink

A third new parameter, ple_window_max, controls a maximal ple_window.
A minimum equals to ple_window.

Signed-off-by: Radim Kr?má? <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18e0e52..e63d7ac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
  * Time is measured based on a counter that runs at the same rate as the TSC,
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
-#define KVM_VMX_DEFAULT_PLE_GAP    128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP           128
+#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
+		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
 
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
 extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
@@ -5679,12 +5697,66 @@ out:
 	return ret;
 }
 
+static int __grow_ple_window(int val)
+{
+	if (ple_window_grow < 1)
+		return ple_window;
+
+	val = min(val, ple_window_actual_max);
+
+	if (ple_window_grow < ple_window)
+		val *= ple_window_grow;
+	else
+		val += ple_window_grow;
+
+	return val;
+}
+
+static int __shrink_ple_window(int val, int shrinker, int minimum)
+{
+	if (shrinker < 1)
+		return ple_window;
+
+	if (shrinker < ple_window)
+		val /= shrinker;
+	else
+		val -= shrinker;
+
+	return max(val, minimum);
+}
+
+static void modify_ple_window(struct kvm_vcpu *vcpu, int grow)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int new;
+
+	if (grow)
+		new = __grow_ple_window(vmx->ple_window);
+	else
+		new = __shrink_ple_window(vmx->ple_window, ple_window_shrink,
+		                          ple_window);
+
+	vmx->ple_window = max(new, ple_window);
+}
+#define grow_ple_window(vcpu)   modify_ple_window(vcpu, 1)
+#define shrink_ple_window(vcpu) modify_ple_window(vcpu, 0)
+
+static void update_ple_window_actual_max(void)
+{
+	ple_window_actual_max =
+			__shrink_ple_window(max(ple_window_max, ple_window),
+			                    ple_window_grow, INT_MIN);
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+	if (ple_gap)
+		grow_ple_window(vcpu);
+
 	skip_emulated_instruction(vcpu);
 	kvm_vcpu_on_spin(vcpu);
 
@@ -8854,6 +8926,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 
 void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	if (ple_gap)
+		shrink_ple_window(vcpu);
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -9077,6 +9151,8 @@ static int __init vmx_init(void)
 	} else
 		kvm_disable_tdp();
 
+	update_ple_window_actual_max();
+
 	return 0;
 
 out7: