@@ -257,4 +257,6 @@ struct kvm_arch{
};
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+static inline void kvm_inc_ple_window(void) {}
+static inline void kvm_dec_ple_window(void) {}
#endif
@@ -707,6 +707,8 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
+ void (*inc_ple_window)(void);
+ void (*dec_ple_window)(void);
};
struct kvm_arch_async_pf {
@@ -1007,5 +1009,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+void kvm_inc_ple_window(void);
+void kvm_dec_ple_window(void);
#endif /* _ASM_X86_KVM_HOST_H */
@@ -4220,6 +4220,14 @@ out:
return ret;
}
+static inline void svm_inc_ple_window(void)
+{
+}
+
+static inline void svm_dec_ple_window(void)
+{
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4310,6 +4318,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
+ .inc_ple_window = svm_inc_ple_window,
+ .dec_ple_window = svm_dec_ple_window,
};
static int __init svm_init(void)
@@ -115,12 +115,17 @@ module_param(nested, bool, S_IRUGO);
* According to test, this time is usually smaller than 128 cycles.
* ple_window: upper bound on the amount of time a guest is allowed to execute
* in a PAUSE loop. Tests indicate that most spinlocks are held for
- * less than 2^12 cycles
+ * less than 2^12 cycles. But we keep the default value 2^14 to
+ * ensure less overhead in uncontended cases.
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW 16384
+#define KVM_VMX_MAX_PLE_WINDOW 16384
+#define KVM_VMX_MIN_PLE_WINDOW 4096
+#define KVM_VMX_PLE_WINDOW_DELTA 1024
+
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
@@ -7149,6 +7154,27 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->host_ia32_perf_global_ctrl);
}
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static inline void vmx_inc_ple_window(void)
+{
+ if (ple_gap) {
+ ple_window = MIN(KVM_VMX_MAX_PLE_WINDOW,
+ ple_window + KVM_VMX_PLE_WINDOW_DELTA);
+ vmcs_write32(PLE_WINDOW, ple_window);
+ }
+}
+
+static inline void vmx_dec_ple_window(void)
+{
+ if (ple_gap) {
+ ple_window = MAX(KVM_VMX_MIN_PLE_WINDOW,
+ ple_window - (KVM_VMX_PLE_WINDOW_DELTA>>2));
+ vmcs_write32(PLE_WINDOW, ple_window);
+ }
+}
+
/*
* Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
* and modify vmcs12 to make it see what it would expect to see there if
@@ -7314,6 +7340,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
+ .inc_ple_window = vmx_inc_ple_window,
+ .dec_ple_window = vmx_dec_ple_window,
};
static int __init vmx_init(void)
@@ -6052,6 +6052,16 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return r;
}
+void kvm_inc_ple_window(void)
+{
+ kvm_x86_ops->inc_ple_window();
+}
+
+void kvm_dec_ple_window(void)
+{
+ kvm_x86_ops->dec_ple_window();
+}
+
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int r;
@@ -1731,15 +1731,20 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
yielded = kvm_vcpu_yield_to(vcpu);
if (yielded > 0) {
+ kvm_dec_ple_window();
kvm->last_boosted_vcpu = i;
break;
} else if (yielded < 0) {
try--;
+ kvm_inc_ple_window();
if (!try)
break;
}
}
}
+ if (!yielded)
+ kvm_inc_ple_window();
+
kvm_vcpu_set_in_spin_loop(me, false);
/* Ensure vcpu is not eligible during next spinloop */