diff mbox series

[RFC,2/2] cpus: Fix throttling during vm_stop

Message ID 20190710092338.23559-3-yury-kotov@yandex-team.ru (mailing list archive)
State New, archived
Headers show
Series High downtime with 95+ throttle pct | expand

Commit Message

Yury Kotov July 10, 2019, 9:23 a.m. UTC
Throttling thread sleeps in VCPU thread. For high throttle percentage
this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
vm_stop() kicks all VCPUs and waits for them. It's called at the end of
migration and because of the long sleep the migration downtime might be
more than 100ms even for downtime-limit 1ms.
Use qemu_cond_timedwait for high percentage to wake up during vm_stop.

Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
---
 cpus.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

Comments

Yury Kotov July 15, 2019, 9:40 a.m. UTC | #1
Hi,

10.07.2019, 12:26, "Yury Kotov" <yury-kotov@yandex-team.ru>:
> Throttling thread sleeps in VCPU thread. For high throttle percentage
> this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
> vm_stop() kicks all VCPUs and waits for them. It's called at the end of
> migration and because of the long sleep the migration downtime might be
> more than 100ms even for downtime-limit 1ms.
> Use qemu_cond_timedwait for high percentage to wake up during vm_stop.
>
> Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> ---
>  cpus.c | 27 +++++++++++++++++++--------
>  1 file changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index ffc57119ca..3c069cdc33 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -74,6 +74,8 @@
>
>  #endif /* CONFIG_LINUX */
>
> +static QemuMutex qemu_global_mutex;
> +
>  int64_t max_delay;
>  int64_t max_advance;
>
> @@ -776,7 +778,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>  {
>      double pct;
>      double throttle_ratio;
> - long sleeptime_ns;
> + int64_t sleeptime_ns;
>
>      if (!cpu_throttle_get_percentage()) {
>          return;
> @@ -784,11 +786,22 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>
>      pct = (double)cpu_throttle_get_percentage()/100;
>      throttle_ratio = pct / (1 - pct);
> - sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
> -
> - qemu_mutex_unlock_iothread();
> - g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
> - qemu_mutex_lock_iothread();
> + /* Add 1ns to fix double's rounding error (like 0.9999999...) */
> + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
> +
> + while (sleeptime_ns >= SCALE_MS && !cpu->stop) {
> + int64_t start, end;
> + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> + qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,

Paolo, Richard, please tell me what you think.
I'm not sure is it correct to use qemu_cond_timedwait() here?
I see that qemu_cond_timedwait()/qemu_cond_wait() and
qemu_mutex_(un)lock_iothread() have a different behavior in some cases.
But there are some similar using of qemu_cond_wait with halt_cond, so may be
it's ok to use qemu_cond_timedwait() here too.

> + sleeptime_ns / SCALE_MS);
> + end = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> + sleeptime_ns -= end - start;
> + }
> + if (sleeptime_ns >= SCALE_US && !cpu->stop) {
> + qemu_mutex_unlock_iothread();
> + g_usleep(sleeptime_ns / SCALE_US);
> + qemu_mutex_lock_iothread();
> + }
>      atomic_set(&cpu->throttle_thread_scheduled, 0);
>  }
>
> @@ -1166,8 +1179,6 @@ static void qemu_init_sigbus(void)
>  }
>  #endif /* !CONFIG_LINUX */
>
> -static QemuMutex qemu_global_mutex;
> -
>  static QemuThread io_thread;
>
>  /* cpu creation */
> --
> 2.22.0

Regards,
Yury
Paolo Bonzini July 15, 2019, 11 a.m. UTC | #2
On 15/07/19 11:40, Yury Kotov wrote:
> Hi,
> 
> 10.07.2019, 12:26, "Yury Kotov" <yury-kotov@yandex-team.ru>:
>> Throttling thread sleeps in VCPU thread. For high throttle percentage
>> this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
>> vm_stop() kicks all VCPUs and waits for them. It's called at the end of
>> migration and because of the long sleep the migration downtime might be
>> more than 100ms even for downtime-limit 1ms.
>> Use qemu_cond_timedwait for high percentage to wake up during vm_stop.
>>
>> Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
>> ---
>>  cpus.c | 27 +++++++++++++++++++--------
>>  1 file changed, 19 insertions(+), 8 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index ffc57119ca..3c069cdc33 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -74,6 +74,8 @@
>>
>>  #endif /* CONFIG_LINUX */
>>
>> +static QemuMutex qemu_global_mutex;
>> +
>>  int64_t max_delay;
>>  int64_t max_advance;
>>
>> @@ -776,7 +778,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>>  {
>>      double pct;
>>      double throttle_ratio;
>> - long sleeptime_ns;
>> + int64_t sleeptime_ns;
>>
>>      if (!cpu_throttle_get_percentage()) {
>>          return;
>> @@ -784,11 +786,22 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>>
>>      pct = (double)cpu_throttle_get_percentage()/100;
>>      throttle_ratio = pct / (1 - pct);
>> - sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
>> -
>> - qemu_mutex_unlock_iothread();
>> - g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
>> - qemu_mutex_lock_iothread();
>> + /* Add 1ns to fix double's rounding error (like 0.9999999...) */
>> + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
>> +
>> + while (sleeptime_ns >= SCALE_MS && !cpu->stop) {
>> + int64_t start, end;
>> + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>> + qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
> 
> Paolo, Richard, please tell me what you think.
> I'm not sure is it correct to use qemu_cond_timedwait() here?
> I see that qemu_cond_timedwait()/qemu_cond_wait() and
> qemu_mutex_(un)lock_iothread() have a different behavior in some cases.
> But there are some similar using of qemu_cond_wait with halt_cond, so may be
> it's ok to use qemu_cond_timedwait() here too.

Back in the day, Windows didn't have condition variables and making the
implementation robust and efficient was a mess---so there was no
qemu_cond_timedwait.  Semapshores are also a wee bit more scalable, so
qemu_sem_timedwait was introduced.

Now, I don't think it's an issue to add qemu_cond_timedwait.

Thanks,

Paolo

> 
>> + sleeptime_ns / SCALE_MS);
>> + end = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>> + sleeptime_ns -= end - start;
>> + }
>> + if (sleeptime_ns >= SCALE_US && !cpu->stop) {
>> + qemu_mutex_unlock_iothread();
>> + g_usleep(sleeptime_ns / SCALE_US);
>> + qemu_mutex_lock_iothread();
>> + }
>>      atomic_set(&cpu->throttle_thread_scheduled, 0);
>>  }
>>
>> @@ -1166,8 +1179,6 @@ static void qemu_init_sigbus(void)
>>  }
>>  #endif /* !CONFIG_LINUX */
>>
>> -static QemuMutex qemu_global_mutex;
>> -
>>  static QemuThread io_thread;
>>
>>  /* cpu creation */
>> --
>> 2.22.0
> 
> Regards,
> Yury
>
Yury Kotov July 15, 2019, 12:36 p.m. UTC | #3
15.07.2019, 14:00, "Paolo Bonzini" <pbonzini@redhat.com>:
> On 15/07/19 11:40, Yury Kotov wrote:
>>  Hi,
>>
>>  10.07.2019, 12:26, "Yury Kotov" <yury-kotov@yandex-team.ru>:
>>>  Throttling thread sleeps in VCPU thread. For high throttle percentage
>>>  this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
>>>  vm_stop() kicks all VCPUs and waits for them. It's called at the end of
>>>  migration and because of the long sleep the migration downtime might be
>>>  more than 100ms even for downtime-limit 1ms.
>>>  Use qemu_cond_timedwait for high percentage to wake up during vm_stop.
>>>
>>>  Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
>>>  ---
>>>   cpus.c | 27 +++++++++++++++++++--------
>>>   1 file changed, 19 insertions(+), 8 deletions(-)
>>>
>>>  diff --git a/cpus.c b/cpus.c
>>>  index ffc57119ca..3c069cdc33 100644
>>>  --- a/cpus.c
>>>  +++ b/cpus.c
>>>  @@ -74,6 +74,8 @@
>>>
>>>   #endif /* CONFIG_LINUX */
>>>
>>>  +static QemuMutex qemu_global_mutex;
>>>  +
>>>   int64_t max_delay;
>>>   int64_t max_advance;
>>>
>>>  @@ -776,7 +778,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>>>   {
>>>       double pct;
>>>       double throttle_ratio;
>>>  - long sleeptime_ns;
>>>  + int64_t sleeptime_ns;
>>>
>>>       if (!cpu_throttle_get_percentage()) {
>>>           return;
>>>  @@ -784,11 +786,22 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>>>
>>>       pct = (double)cpu_throttle_get_percentage()/100;
>>>       throttle_ratio = pct / (1 - pct);
>>>  - sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
>>>  -
>>>  - qemu_mutex_unlock_iothread();
>>>  - g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
>>>  - qemu_mutex_lock_iothread();
>>>  + /* Add 1ns to fix double's rounding error (like 0.9999999...) */
>>>  + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
>>>  +
>>>  + while (sleeptime_ns >= SCALE_MS && !cpu->stop) {
>>>  + int64_t start, end;
>>>  + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>>>  + qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
>>
>>  Paolo, Richard, please tell me what you think.
>>  I'm not sure is it correct to use qemu_cond_timedwait() here?
>>  I see that qemu_cond_timedwait()/qemu_cond_wait() and
>>  qemu_mutex_(un)lock_iothread() have a different behavior in some cases.
>>  But there are some similar using of qemu_cond_wait with halt_cond, so may be
>>  it's ok to use qemu_cond_timedwait() here too.
>
> Back in the day, Windows didn't have condition variables and making the
> implementation robust and efficient was a mess---so there was no
> qemu_cond_timedwait. Semapshores are also a wee bit more scalable, so
> qemu_sem_timedwait was introduced.
>
> Now, I don't think it's an issue to add qemu_cond_timedwait.
>

Sorry, perhaps I was not accurate enough.

To fix the bug I changed the logic of cpu_throttle_thread() function.
Before this function called qemu_mutex_(un)lock_iothread which encapsulates
work with qemu_global_mutex.

Now, this calls qemu_cond_timedwait(..., &qemu_global_mutex, ...) which also
unlocks/locks qemu_global_mutex. But, in theory, behavior of
qemu_mutex_(un)lock_iothread may differ from simple locking/unlocking of
qemu_global_mutex.

So, I'm not sure is such change is ok or not.

> Thanks,
>
> Paolo
>
>>>  + sleeptime_ns / SCALE_MS);
>>>  + end = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>>>  + sleeptime_ns -= end - start;
>>>  + }
>>>  + if (sleeptime_ns >= SCALE_US && !cpu->stop) {
>>>  + qemu_mutex_unlock_iothread();
>>>  + g_usleep(sleeptime_ns / SCALE_US);
>>>  + qemu_mutex_lock_iothread();
>>>  + }
>>>       atomic_set(&cpu->throttle_thread_scheduled, 0);
>>>   }
>>>
>>>  @@ -1166,8 +1179,6 @@ static void qemu_init_sigbus(void)
>>>   }
>>>   #endif /* !CONFIG_LINUX */
>>>
>>>  -static QemuMutex qemu_global_mutex;
>>>  -
>>>   static QemuThread io_thread;
>>>
>>>   /* cpu creation */
>>>  --
>>>  2.22.0
>>
>>  Regards,
>>  Yury

Regards,
Yury
Paolo Bonzini July 15, 2019, 12:54 p.m. UTC | #4
On 15/07/19 14:36, Yury Kotov wrote:
> Sorry, perhaps I was not accurate enough.
> 
> To fix the bug I changed the logic of cpu_throttle_thread() function.
> Before this function called qemu_mutex_(un)lock_iothread which encapsulates
> work with qemu_global_mutex.
> 
> Now, this calls qemu_cond_timedwait(..., &qemu_global_mutex, ...) which also
> unlocks/locks qemu_global_mutex. But, in theory, behavior of
> qemu_mutex_(un)lock_iothread may differ from simple locking/unlocking of
> qemu_global_mutex.
> 
> So, I'm not sure is such change is ok or not.

Ah, I see.  No, it's okay.  The only difference is setting/clearing
iothread_locked which doesn't matter here.

Paolo
diff mbox series

Patch

diff --git a/cpus.c b/cpus.c
index ffc57119ca..3c069cdc33 100644
--- a/cpus.c
+++ b/cpus.c
@@ -74,6 +74,8 @@ 
 
 #endif /* CONFIG_LINUX */
 
+static QemuMutex qemu_global_mutex;
+
 int64_t max_delay;
 int64_t max_advance;
 
@@ -776,7 +778,7 @@  static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
     double pct;
     double throttle_ratio;
-    long sleeptime_ns;
+    int64_t sleeptime_ns;
 
     if (!cpu_throttle_get_percentage()) {
         return;
@@ -784,11 +786,22 @@  static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 
     pct = (double)cpu_throttle_get_percentage()/100;
     throttle_ratio = pct / (1 - pct);
-    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
-
-    qemu_mutex_unlock_iothread();
-    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
-    qemu_mutex_lock_iothread();
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+
+    while (sleeptime_ns >= SCALE_MS && !cpu->stop) {
+        int64_t start, end;
+        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
+                            sleeptime_ns / SCALE_MS);
+        end = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        sleeptime_ns -= end - start;
+    }
+    if (sleeptime_ns >= SCALE_US && !cpu->stop) {
+        qemu_mutex_unlock_iothread();
+        g_usleep(sleeptime_ns / SCALE_US);
+        qemu_mutex_lock_iothread();
+    }
     atomic_set(&cpu->throttle_thread_scheduled, 0);
 }
 
@@ -1166,8 +1179,6 @@  static void qemu_init_sigbus(void)
 }
 #endif /* !CONFIG_LINUX */
 
-static QemuMutex qemu_global_mutex;
-
 static QemuThread io_thread;
 
 /* cpu creation */