diff mbox series

target/i386: Reset TSCs of parked vCPUs too on VM reset

Message ID 5a605a88e9a231386dc803c60f5fed9b48108139.1734014926.git.maciej.szmigiero@oracle.com (mailing list archive)
State New
Headers show
Series target/i386: Reset TSCs of parked vCPUs too on VM reset | expand

Commit Message

Maciej S. Szmigiero Dec. 12, 2024, 2:51 p.m. UTC
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

Since commit 5286c3662294 ("target/i386: properly reset TSC on reset")
QEMU writes the special value of "1" to each online vCPU TSC on VM reset
to reset it.

However parked vCPUs don't get that handling and due to that their TSCs
get desynchronized when the VM gets reset.
This in turn causes KVM to turn off PVCLOCK_TSC_STABLE_BIT in its exported
PV clock.
Note that KVM has no understanding of vCPU being currently parked.

Without PVCLOCK_TSC_STABLE_BIT the sched clock is marked unstable in
the guest's kvm_sched_clock_init().
This causes a performance regressions to show in some tests.

Fix this issue by writing the special value of "1" also to TSCs of parked
vCPUs on VM reset.


Reproducing the issue:
1) Boot a VM with "-smp 2,maxcpus=3" or similar

2) device_add host-x86_64-cpu,id=vcpu,node-id=0,socket-id=0,core-id=2,thread-id=0

3) Wait a few seconds

4) device_del vcpu

5) Inside the VM run:
# echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
Observe the sched_clock_stable() value is 1.

6) Reboot the VM

7) Once the VM boots once again run inside it:
# echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
Observe the sched_clock_stable() value is now 0.


Fixes: 5286c3662294 ("target/i386: properly reset TSC on reset")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 accel/kvm/kvm-all.c                | 11 +++++++++++
 configs/targets/i386-softmmu.mak   |  1 +
 configs/targets/x86_64-softmmu.mak |  1 +
 include/sysemu/kvm.h               |  8 ++++++++
 target/i386/kvm/kvm.c              | 15 +++++++++++++++
 5 files changed, 36 insertions(+)

Comments

Paolo Bonzini Dec. 12, 2024, 3:57 p.m. UTC | #1
Queued, thanks.

Paolo
Igor Mammedov Dec. 12, 2024, 4 p.m. UTC | #2
On Thu, 12 Dec 2024 15:51:15 +0100
"Maciej S. Szmigiero" <mail@maciej.szmigiero.name> wrote:

> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
> 
> Since commit 5286c3662294 ("target/i386: properly reset TSC on reset")
> QEMU writes the special value of "1" to each online vCPU TSC on VM reset
> to reset it.
> 
> However parked vCPUs don't get that handling and due to that their TSCs
> get desynchronized when the VM gets reset.
> This in turn causes KVM to turn off PVCLOCK_TSC_STABLE_BIT in its exported
> PV clock.
> Note that KVM has no understanding of vCPU being currently parked.
> 
> Without PVCLOCK_TSC_STABLE_BIT the sched clock is marked unstable in
> the guest's kvm_sched_clock_init().
> This causes a performance regressions to show in some tests.
> 
> Fix this issue by writing the special value of "1" also to TSCs of parked
> vCPUs on VM reset.

does TSC still ticks when vCPU is parked or it is paused at some value?

> 
> 
> Reproducing the issue:
> 1) Boot a VM with "-smp 2,maxcpus=3" or similar
> 
> 2) device_add host-x86_64-cpu,id=vcpu,node-id=0,socket-id=0,core-id=2,thread-id=0
> 
> 3) Wait a few seconds
> 
> 4) device_del vcpu
> 
> 5) Inside the VM run:
> # echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
> Observe the sched_clock_stable() value is 1.
> 
> 6) Reboot the VM
> 
> 7) Once the VM boots once again run inside it:
> # echo "t" >/proc/sysrq-trigger; dmesg | grep sched_clock_stable
> Observe the sched_clock_stable() value is now 0.
> 
> 
> Fixes: 5286c3662294 ("target/i386: properly reset TSC on reset")
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>  accel/kvm/kvm-all.c                | 11 +++++++++++
>  configs/targets/i386-softmmu.mak   |  1 +
>  configs/targets/x86_64-softmmu.mak |  1 +
>  include/sysemu/kvm.h               |  8 ++++++++
>  target/i386/kvm/kvm.c              | 15 +++++++++++++++
>  5 files changed, 36 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 801cff16a5a2..dec1d1c16a0d 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -437,6 +437,16 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
>      return kvm_fd;
>  }
>  
> +static void kvm_reset_parked_vcpus(void *param)
> +{
> +    KVMState *s = param;
> +    struct KVMParkedVcpu *cpu;
> +
> +    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
> +        kvm_arch_reset_parked_vcpu(cpu->vcpu_id, cpu->kvm_fd);
> +    }
> +}
> +
>  int kvm_create_vcpu(CPUState *cpu)
>  {
>      unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> @@ -2728,6 +2738,7 @@ static int kvm_init(MachineState *ms)
>      }
>  
>      qemu_register_reset(kvm_unpoison_all, NULL);
> +    qemu_register_reset(kvm_reset_parked_vcpus, s);
>  
>      if (s->kernel_irqchip_allowed) {
>          kvm_irqchip_create(s);
> diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak
> index 2ac69d5ba370..2eb0e8625005 100644
> --- a/configs/targets/i386-softmmu.mak
> +++ b/configs/targets/i386-softmmu.mak
> @@ -1,4 +1,5 @@
>  TARGET_ARCH=i386
>  TARGET_SUPPORTS_MTTCG=y
>  TARGET_KVM_HAVE_GUEST_DEBUG=y
> +TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
>  TARGET_XML_FILES= gdb-xml/i386-32bit.xml
> diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak
> index e12ac3dc59bf..920e9a42006f 100644
> --- a/configs/targets/x86_64-softmmu.mak
> +++ b/configs/targets/x86_64-softmmu.mak
> @@ -2,4 +2,5 @@ TARGET_ARCH=x86_64
>  TARGET_BASE_ARCH=i386
>  TARGET_SUPPORTS_MTTCG=y
>  TARGET_KVM_HAVE_GUEST_DEBUG=y
> +TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
>  TARGET_XML_FILES= gdb-xml/i386-64bit.xml
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index c3a60b28909a..ab17c09a551f 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -377,6 +377,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s);
>  int kvm_arch_init_vcpu(CPUState *cpu);
>  int kvm_arch_destroy_vcpu(CPUState *cpu);
>  
> +#ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU
> +void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd);
> +#else
> +static inline void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
> +{
> +}
> +#endif
> +
>  bool kvm_vcpu_id_is_valid(int vcpu_id);
>  
>  /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 8e17942c3ba1..2ff618fbf138 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -2415,6 +2415,21 @@ void kvm_arch_after_reset_vcpu(X86CPU *cpu)
>      }
>  }
>  
> +void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
> +{
> +    g_autofree struct kvm_msrs *msrs = NULL;
> +
> +    msrs = g_malloc0(sizeof(*msrs) + sizeof(msrs->entries[0]));
> +    msrs->entries[0].index = MSR_IA32_TSC;
> +    msrs->entries[0].data = 1; /* match the value in x86_cpu_reset() */
> +    msrs->nmsrs++;
> +
> +    if (ioctl(kvm_fd, KVM_SET_MSRS, msrs) != 1) {
> +        warn_report("parked vCPU %lu TSC reset failed: %d",
> +                    vcpu_id, errno);
> +    }
> +}
> +
>  void kvm_arch_do_init_vcpu(X86CPU *cpu)
>  {
>      CPUX86State *env = &cpu->env;
>
Maciej S. Szmigiero Dec. 12, 2024, 9:32 p.m. UTC | #3
On 12.12.2024 17:00, Igor Mammedov wrote:
> On Thu, 12 Dec 2024 15:51:15 +0100
> "Maciej S. Szmigiero" <mail@maciej.szmigiero.name> wrote:
> 
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> Since commit 5286c3662294 ("target/i386: properly reset TSC on reset")
>> QEMU writes the special value of "1" to each online vCPU TSC on VM reset
>> to reset it.
>>
>> However parked vCPUs don't get that handling and due to that their TSCs
>> get desynchronized when the VM gets reset.
>> This in turn causes KVM to turn off PVCLOCK_TSC_STABLE_BIT in its exported
>> PV clock.
>> Note that KVM has no understanding of vCPU being currently parked.
>>
>> Without PVCLOCK_TSC_STABLE_BIT the sched clock is marked unstable in
>> the guest's kvm_sched_clock_init().
>> This causes a performance regressions to show in some tests.
>>
>> Fix this issue by writing the special value of "1" also to TSCs of parked
>> vCPUs on VM reset.
> 
> does TSC still ticks when vCPU is parked or it is paused at some value?
> 

A parked vCPUs isn't being scheduled/run so it's a bit of an
academic discussion whether its (virtual) TSC ticks or not.

TSC of a parked vCPU gets synced once again ("0" write) when it
gets unparked.

Thanks,
Maciej
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 801cff16a5a2..dec1d1c16a0d 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -437,6 +437,16 @@  int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
     return kvm_fd;
 }
 
+static void kvm_reset_parked_vcpus(void *param)
+{
+    KVMState *s = param;
+    struct KVMParkedVcpu *cpu;
+
+    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
+        kvm_arch_reset_parked_vcpu(cpu->vcpu_id, cpu->kvm_fd);
+    }
+}
+
 int kvm_create_vcpu(CPUState *cpu)
 {
     unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
@@ -2728,6 +2738,7 @@  static int kvm_init(MachineState *ms)
     }
 
     qemu_register_reset(kvm_unpoison_all, NULL);
+    qemu_register_reset(kvm_reset_parked_vcpus, s);
 
     if (s->kernel_irqchip_allowed) {
         kvm_irqchip_create(s);
diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak
index 2ac69d5ba370..2eb0e8625005 100644
--- a/configs/targets/i386-softmmu.mak
+++ b/configs/targets/i386-softmmu.mak
@@ -1,4 +1,5 @@ 
 TARGET_ARCH=i386
 TARGET_SUPPORTS_MTTCG=y
 TARGET_KVM_HAVE_GUEST_DEBUG=y
+TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
 TARGET_XML_FILES= gdb-xml/i386-32bit.xml
diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak
index e12ac3dc59bf..920e9a42006f 100644
--- a/configs/targets/x86_64-softmmu.mak
+++ b/configs/targets/x86_64-softmmu.mak
@@ -2,4 +2,5 @@  TARGET_ARCH=x86_64
 TARGET_BASE_ARCH=i386
 TARGET_SUPPORTS_MTTCG=y
 TARGET_KVM_HAVE_GUEST_DEBUG=y
+TARGET_KVM_HAVE_RESET_PARKED_VCPU=y
 TARGET_XML_FILES= gdb-xml/i386-64bit.xml
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c3a60b28909a..ab17c09a551f 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -377,6 +377,14 @@  int kvm_arch_init(MachineState *ms, KVMState *s);
 int kvm_arch_init_vcpu(CPUState *cpu);
 int kvm_arch_destroy_vcpu(CPUState *cpu);
 
+#ifdef TARGET_KVM_HAVE_RESET_PARKED_VCPU
+void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd);
+#else
+static inline void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
+{
+}
+#endif
+
 bool kvm_vcpu_id_is_valid(int vcpu_id);
 
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 8e17942c3ba1..2ff618fbf138 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2415,6 +2415,21 @@  void kvm_arch_after_reset_vcpu(X86CPU *cpu)
     }
 }
 
+void kvm_arch_reset_parked_vcpu(unsigned long vcpu_id, int kvm_fd)
+{
+    g_autofree struct kvm_msrs *msrs = NULL;
+
+    msrs = g_malloc0(sizeof(*msrs) + sizeof(msrs->entries[0]));
+    msrs->entries[0].index = MSR_IA32_TSC;
+    msrs->entries[0].data = 1; /* match the value in x86_cpu_reset() */
+    msrs->nmsrs++;
+
+    if (ioctl(kvm_fd, KVM_SET_MSRS, msrs) != 1) {
+        warn_report("parked vCPU %lu TSC reset failed: %d",
+                    vcpu_id, errno);
+    }
+}
+
 void kvm_arch_do_init_vcpu(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;