diff mbox

[v5,untested] kvm: better MWAIT emulation for guests

Message ID 20170316172243.GE14076@potion (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář March 16, 2017, 5:22 p.m. UTC
2017-03-16 12:47-0400, Gabriel L. Somlo:
> On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > 2017-03-16 16:35+0100, Radim Krčmář:
> > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > >> The intel manual said the same thing back in 2010 as well. However,
> > >> regardless of how any flags were set, interrupt-window exiting or not,
> > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > >> per the ISA definition of MWAIT :)
> > > 
> > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > for you ...
> > 
> > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > 
> > and try this, thanks!
> > 
> > ---8<---
> > x86/mwait: crappy test
> > 
> > `./configure && make` to build it, then follow the comment in code to
> > try few cases.
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> enabling apic
> PASS: resumed from mwait 10000 times
> SUMMARY: 1 tests
> 
> real    0m10.564s
> user    0m10.339s
> sys     0m0.225s
> 
> 
> and
> 
> kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> enabling apic
> PASS: resumed from mwait 10000 times
> SUMMARY: 1 tests
> 
> real    0m0.746s
> user    0m0.555s
> sys     0m0.200s
> 
> Both of these with Michael's v5 patch applied, on the MacPro1,1.
> 
> Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> on the macbook air.
> 
> If I revert to the original (nop-emulated MWAIT) kvm source, I get
> both versions to return immediately.

Those look normal ... maybe MWAIT just ignores writes to the monitored
area?

Please apply the patch below and following and try:

  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1' -smp 2
  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 1' -smp 2
  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 0' -smp 2

All of them should take rougly the same time as the NOP one,

  time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0' -smp 2

Thanks.

---8<---

Comments

Gabriel L. Somlo March 16, 2017, 5:39 p.m. UTC | #1
On Thu, Mar 16, 2017 at 06:22:44PM +0100, Radim Krčmář wrote:
> 2017-03-16 12:47-0400, Gabriel L. Somlo:
> > On Thu, Mar 16, 2017 at 05:01:58PM +0100, Radim Krčmář wrote:
> > > 2017-03-16 16:35+0100, Radim Krčmář:
> > > > 2017-03-16 10:58-0400, Gabriel L. Somlo:
> > > >> The intel manual said the same thing back in 2010 as well. However,
> > > >> regardless of how any flags were set, interrupt-window exiting or not,
> > > >> "normal" L1 MWAIT behavior was that it woke up immediately regardless.
> > > >> Remember, never going to sleep is still correct ("normal" ?) behavior
> > > >> per the ISA definition of MWAIT :)
> > > > 
> > > > I'll write a simple kvm-unit-test to better understand why it is broken
> > > > for you ...
> > > 
> > > Please get git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git
> > > 
> > > and try this, thanks!
> > > 
> > > ---8<---
> > > x86/mwait: crappy test
> > > 
> > > `./configure && make` to build it, then follow the comment in code to
> > > try few cases.
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1
> > enabling apic
> > PASS: resumed from mwait 10000 times
> > SUMMARY: 1 tests
> > 
> > real    0m10.564s
> > user    0m10.339s
> > sys     0m0.225s
> > 
> > 
> > and
> > 
> > kvm-unit-tests]$ time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0'
> > timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 0
> > enabling apic
> > PASS: resumed from mwait 10000 times
> > SUMMARY: 1 tests
> > 
> > real    0m0.746s
> > user    0m0.555s
> > sys     0m0.200s
> > 
> > Both of these with Michael's v5 patch applied, on the MacPro1,1.
> > 
> > Similar behavior (0 1 1 takes 10 seconds, 0 1 0 returns immediately)
> > on the macbook air.
> > 
> > If I revert to the original (nop-emulated MWAIT) kvm source, I get
> > both versions to return immediately.
> 
> Those look normal ... maybe MWAIT just ignores writes to the monitored
> area?
> 
> Please apply the patch below and following and try:
> 
>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 1' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 1 1 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 10000 times
SUMMARY: 1 tests

real    0m0.758s
user    0m0.557s
sys     0m0.220s

>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 1' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 1 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 10000 times
SUMMARY: 1 tests

real    0m0.748s
user    0m0.550s
sys     0m0.210s

>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 0 0' -smp 2

timeout -k 1s --foreground 20 qemu-kvm -nodefaults -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev -kernel x86/mwait.flat -append 0 0 0 -smp 2
enabling apic
enabling apic
PASS: resumed from mwait 10000 times
SUMMARY: 1 tests

real    0m0.745s
user    0m0.558s
sys     0m0.203s

> 
> All of them should take rougly the same time as the NOP one,
> 
>   time TIMEOUT=20 ./x86-run x86/mwait.flat -append '0 1 0' -smp 2

They all *did* return fast, as you expected.

> ---8<---
> diff --git a/x86/mwait.c b/x86/mwait.c
> index c21dab5cc97d..ca38e7223596 100644
> --- a/x86/mwait.c
> +++ b/x86/mwait.c
> @@ -1,7 +1,9 @@
>  #include "vm.h"
> +#include "smp.h"
>  
>  #define TARGET_RESUMES 10000
>  volatile unsigned page[4096 / 4];
> +volatile unsigned resumes;
>  
>  /*
>   * Execute
> @@ -18,19 +20,39 @@ volatile unsigned page[4096 / 4];
>   * Getting killed by the TIMEOUT most likely means that you have different HZ,
>   * but could also be a bug ...
>   */
> +void writer(void *null)
> +{
> +	int i;
> +	unsigned old_resumes = 0, new_resumes;
> +
> +	for (i = 0; i < TARGET_RESUMES; i++) {
> +		(*page)++;
> +
> +		while (old_resumes == (new_resumes = resumes))
> +			pause();
> +		old_resumes = new_resumes;
> +	}
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	uint32_t eax = atol(argv[1]);
>  	uint32_t ecx = atol(argv[2]);
>  	bool sti = atol(argv[3]);
> -	unsigned resumes = 0;
> +	bool smp;
> +
> +	smp_init();
> +	smp = cpu_count() > 1;
> +
> +	if (smp)
> +		on_cpu_async(1, writer, NULL);
>  
>  	if (sti)
>  		asm volatile ("sti");
>  	else
>  		asm volatile ("cli");
>  
> -	while (resumes < TARGET_RESUMES) {
> +	while ((smp ? *page : resumes) < TARGET_RESUMES) {
>  		asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
>  		asm volatile("mwait" :: "a" (eax), "c" (ecx));
>  		resumes++;
diff mbox

Patch

diff --git a/x86/mwait.c b/x86/mwait.c
index c21dab5cc97d..ca38e7223596 100644
--- a/x86/mwait.c
+++ b/x86/mwait.c
@@ -1,7 +1,9 @@ 
 #include "vm.h"
+#include "smp.h"
 
 #define TARGET_RESUMES 10000
 volatile unsigned page[4096 / 4];
+volatile unsigned resumes;
 
 /*
  * Execute
@@ -18,19 +20,39 @@  volatile unsigned page[4096 / 4];
  * Getting killed by the TIMEOUT most likely means that you have different HZ,
  * but could also be a bug ...
  */
+void writer(void *null)
+{
+	int i;
+	unsigned old_resumes = 0, new_resumes;
+
+	for (i = 0; i < TARGET_RESUMES; i++) {
+		(*page)++;
+
+		while (old_resumes == (new_resumes = resumes))
+			pause();
+		old_resumes = new_resumes;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	uint32_t eax = atol(argv[1]);
 	uint32_t ecx = atol(argv[2]);
 	bool sti = atol(argv[3]);
-	unsigned resumes = 0;
+	bool smp;
+
+	smp_init();
+	smp = cpu_count() > 1;
+
+	if (smp)
+		on_cpu_async(1, writer, NULL);
 
 	if (sti)
 		asm volatile ("sti");
 	else
 		asm volatile ("cli");
 
-	while (resumes < TARGET_RESUMES) {
+	while ((smp ? *page : resumes) < TARGET_RESUMES) {
 		asm volatile("monitor" :: "a" (page), "c" (0), "d" (0));
 		asm volatile("mwait" :: "a" (eax), "c" (ecx));
 		resumes++;