diff mbox series

qga: add command 'guest-get-cpustats'

Message ID 20220701030533.575255-1-pizhenwei@bytedance.com (mailing list archive)
State New, archived
Headers show
Series qga: add command 'guest-get-cpustats' | expand

Commit Message

zhenwei pi July 1, 2022, 3:05 a.m. UTC
A vCPU thread always reaches 100% utilization when:
- guest uses idle=poll
- disable HLT vm-exit
- enable MWAIT

Add new guest agent command 'guest-get-cpustats' to get guest CPU
statistics, we can know the guest workload and how busy the CPU is.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 qga/commands-posix.c | 72 ++++++++++++++++++++++++++++++++++++++++++++
 qga/commands-win32.c |  6 ++++
 qga/qapi-schema.json | 49 ++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+)

Comments

Marc-André Lureau July 1, 2022, 1:52 p.m. UTC | #1
Hi

On Fri, Jul 1, 2022 at 7:11 AM zhenwei pi <pizhenwei@bytedance.com> wrote:

> A vCPU thread always reaches 100% utilization when:
> - guest uses idle=poll
> - disable HLT vm-exit
> - enable MWAIT
>
> Add new guest agent command 'guest-get-cpustats' to get guest CPU
> statistics, we can know the guest workload and how busy the CPU is.
>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  qga/commands-posix.c | 72 ++++++++++++++++++++++++++++++++++++++++++++
>  qga/commands-win32.c |  6 ++++
>  qga/qapi-schema.json | 49 ++++++++++++++++++++++++++++++
>  3 files changed, 127 insertions(+)
>
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index 0469dc409d..2847023876 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -2893,6 +2893,73 @@ GuestDiskStatsInfoList
> *qmp_guest_get_diskstats(Error **errp)
>      return guest_get_diskstats(errp);
>  }
>
> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
> +{
> +    GuestCpuStatsList *head = NULL, **tail = &head;
> +    const char *cpustats = "/proc/stat";
> +    FILE *fp;
> +    size_t n;
> +    char *line = NULL;
> +
> +    fp = fopen(cpustats, "r");
> +    if (fp  == NULL) {
> +        error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
> +        return NULL;
> +    }
> +
> +    while (getline(&line, &n, fp) != -1) {
> +        GuestCpuStats *cpustat = NULL;
> +        int i;
> +        unsigned long user, system, idle, iowait, irq, softirq, steal,
> guest;
> +        unsigned long nice, guest_nice;
> +        char name[64];
> +
> +        i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
> +                   name, &user, &nice, &system, &idle, &iowait, &irq,
> &softirq,
> +                   &steal, &guest, &guest_nice);
> +
> +        /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
> +        if (strncmp(name, "cpu", 3) || (name[3] == '\0')) {
>

For extra safety, check !name as well


> +            continue;
> +        }
> +
>

if i < 5, I guess you should warn and continue


> +        cpustat = g_new0(GuestCpuStats, 1);
> +        cpustat->cpu = atoi(&name[3]);
> +        cpustat->has_user = true;
> +        cpustat->user = user * 10;
>

proc(5) says that the value is given "in units of USER_HZ (1/100ths of a
second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right
value)", so we should adjust this code if we want to return correctly in ms.


> +        cpustat->has_system = true;
> +        cpustat->system = system * 10;
> +        cpustat->has_idle = true;
> +        cpustat->idle = idle * 10;
> +
> +        /* Linux version >= 2.6 */
>

That's pretty old now (2003), not sure anyone would care about that
comment, but np ;)


> +        if (i > 5) {
> +            cpustat->has_iowait = true;
> +            cpustat->iowait = iowait * 10;
> +            cpustat->has_irq = true;
> +            cpustat->irq = irq * 10;
> +            cpustat->has_softirq = true;
> +            cpustat->softirq = softirq * 10;
> +        }
> +
> +        if (i > 8) {
> +            cpustat->has_steal = true;
> +            cpustat->steal = steal * 10;
> +        }
> +
> +        if (i > 9) {
> +            cpustat->has_guest = true;
> +            cpustat->guest = guest * 10;
> +        }
> +
> +        QAPI_LIST_APPEND(tail, cpustat);
> +    }
> +
> +    free(line);
> +    fclose(fp);
> +    return head;
> +}
> +
>  #else /* defined(__linux__) */
>
>  void qmp_guest_suspend_disk(Error **errp)
> @@ -3247,6 +3314,11 @@ GuestDiskStatsInfoList
> *qmp_guest_get_diskstats(Error **errp)
>      return NULL;
>  }
>
> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
> +{
> +    error_setg(errp, QERR_UNSUPPORTED);
> +    return NULL;
> +}
>
>  #endif /* CONFIG_FSFREEZE */
>
> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
> index 36f94c0f9c..7ed7664715 100644
> --- a/qga/commands-win32.c
> +++ b/qga/commands-win32.c
> @@ -2543,3 +2543,9 @@ GuestDiskStatsInfoList
> *qmp_guest_get_diskstats(Error **errp)
>      error_setg(errp, QERR_UNSUPPORTED);
>      return NULL;
>  }
> +
> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
> +{
> +    error_setg(errp, QERR_UNSUPPORTED);
> +    return NULL;
> +}
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index 9fa20e791b..4859c887b2 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -1576,3 +1576,52 @@
>  { 'command': 'guest-get-diskstats',
>    'returns': ['GuestDiskStatsInfo']
>  }
> +
> +##
> +# @GuestCpuStats:
> +#
> +# Get statistics of each CPU in millisecond.
> +#
> +# @cpu: CPU index in guest OS
> +#
> +# @user: CPU time of user mode
>

"Time spent in user mode." is more understandable (from man proc(5))

Same for the other descriptions.

+#
> +# @system: CPU time of system mode
> +#
> +# @idle: CPU time of idle state
> +#
> +# @iowait: CPU time waiting IO
> +#
> +# @irq: CPU time of hardware interrupt
> +#
> +# @softirq: CPU time of soft interrupt
> +#
> +# @steal: CPU time stolen by host
> +#
> +# @guest: CPU time of running guest mode
>

Why not "guest_nice" ?

Do we expect this struct to be equally meaningful for other OSes?
Otherwise, I would suggest to make a "linux" variant, perhaps.

+#
> +# Since: 7.1
> +##
> +{ 'struct': 'GuestCpuStats',
> +  'data': {'cpu': 'int',
> +           '*user': 'uint64',
> +           '*system': 'uint64',
> +           '*idle': 'uint64',
> +           '*iowait': 'uint64',
> +           '*irq': 'uint64',
> +           '*softirq': 'uint64',
> +           '*steal': 'uint64',
> +           '*guest': 'uint64'
> +           } }
> +
> +##
> +# @guest-get-cpustats:
> +#
> +# Retrieve information about CPU stats.
> +# Returns: List of CPU stats of guest.
> +#
> +# Since: 7.1
> +##
> +{ 'command': 'guest-get-cpustats',
> +  'returns': ['GuestCpuStats']
> +}
> --
> 2.20.1
>
>
>
Konstantin Kostiuk July 1, 2022, 2:23 p.m. UTC | #2
On Fri, Jul 1, 2022 at 4:53 PM Marc-André Lureau <marcandre.lureau@gmail.com>
wrote:

> Hi
>
> On Fri, Jul 1, 2022 at 7:11 AM zhenwei pi <pizhenwei@bytedance.com> wrote:
>
>> A vCPU thread always reaches 100% utilization when:
>> - guest uses idle=poll
>> - disable HLT vm-exit
>> - enable MWAIT
>>
>> Add new guest agent command 'guest-get-cpustats' to get guest CPU
>> statistics, we can know the guest workload and how busy the CPU is.
>>
>> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
>> ---
>>  qga/commands-posix.c | 72 ++++++++++++++++++++++++++++++++++++++++++++
>>  qga/commands-win32.c |  6 ++++
>>  qga/qapi-schema.json | 49 ++++++++++++++++++++++++++++++
>>  3 files changed, 127 insertions(+)
>>
>> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
>> index 0469dc409d..2847023876 100644
>> --- a/qga/commands-posix.c
>> +++ b/qga/commands-posix.c
>> @@ -2893,6 +2893,73 @@ GuestDiskStatsInfoList
>> *qmp_guest_get_diskstats(Error **errp)
>>      return guest_get_diskstats(errp);
>>  }
>>
>> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
>> +{
>> +    GuestCpuStatsList *head = NULL, **tail = &head;
>> +    const char *cpustats = "/proc/stat";
>> +    FILE *fp;
>> +    size_t n;
>> +    char *line = NULL;
>> +
>> +    fp = fopen(cpustats, "r");
>> +    if (fp  == NULL) {
>> +        error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
>> +        return NULL;
>> +    }
>> +
>> +    while (getline(&line, &n, fp) != -1) {
>> +        GuestCpuStats *cpustat = NULL;
>> +        int i;
>> +        unsigned long user, system, idle, iowait, irq, softirq, steal,
>> guest;
>> +        unsigned long nice, guest_nice;
>> +        char name[64];
>> +
>> +        i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
>> +                   name, &user, &nice, &system, &idle, &iowait, &irq,
>> &softirq,
>> +                   &steal, &guest, &guest_nice);
>> +
>> +        /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
>> +        if (strncmp(name, "cpu", 3) || (name[3] == '\0')) {
>>
>
> For extra safety, check !name as well
>
>
>> +            continue;
>> +        }
>> +
>>
>
> if i < 5, I guess you should warn and continue
>

Why should we skip lines where i < 5? We have CPU time of user and system
modes, I think this is good for reporting.


>
>
>> +        cpustat = g_new0(GuestCpuStats, 1);
>> +        cpustat->cpu = atoi(&name[3]);
>> +        cpustat->has_user = true;
>> +        cpustat->user = user * 10;
>>
>
> proc(5) says that the value is given "in units of USER_HZ (1/100ths of a
> second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right
> value)", so we should adjust this code if we want to return correctly in ms.
>
>
>> +        cpustat->has_system = true;
>> +        cpustat->system = system * 10;
>> +        cpustat->has_idle = true;
>> +        cpustat->idle = idle * 10;
>> +
>> +        /* Linux version >= 2.6 */
>>
>
> That's pretty old now (2003), not sure anyone would care about that
> comment, but np ;)
>
>
>> +        if (i > 5) {
>> +            cpustat->has_iowait = true;
>> +            cpustat->iowait = iowait * 10;
>> +            cpustat->has_irq = true;
>> +            cpustat->irq = irq * 10;
>> +            cpustat->has_softirq = true;
>> +            cpustat->softirq = softirq * 10;
>> +        }
>> +
>> +        if (i > 8) {
>> +            cpustat->has_steal = true;
>> +            cpustat->steal = steal * 10;
>> +        }
>> +
>> +        if (i > 9) {
>> +            cpustat->has_guest = true;
>> +            cpustat->guest = guest * 10;
>> +        }
>> +
>> +        QAPI_LIST_APPEND(tail, cpustat);
>> +    }
>> +
>> +    free(line);
>> +    fclose(fp);
>> +    return head;
>> +}
>> +
>>  #else /* defined(__linux__) */
>>
>>  void qmp_guest_suspend_disk(Error **errp)
>> @@ -3247,6 +3314,11 @@ GuestDiskStatsInfoList
>> *qmp_guest_get_diskstats(Error **errp)
>>      return NULL;
>>  }
>>
>> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
>> +{
>> +    error_setg(errp, QERR_UNSUPPORTED);
>> +    return NULL;
>> +}
>>
>>  #endif /* CONFIG_FSFREEZE */
>>
>> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
>> index 36f94c0f9c..7ed7664715 100644
>> --- a/qga/commands-win32.c
>> +++ b/qga/commands-win32.c
>> @@ -2543,3 +2543,9 @@ GuestDiskStatsInfoList
>> *qmp_guest_get_diskstats(Error **errp)
>>      error_setg(errp, QERR_UNSUPPORTED);
>>      return NULL;
>>  }
>> +
>> +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
>> +{
>> +    error_setg(errp, QERR_UNSUPPORTED);
>> +    return NULL;
>> +}
>> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
>> index 9fa20e791b..4859c887b2 100644
>> --- a/qga/qapi-schema.json
>> +++ b/qga/qapi-schema.json
>> @@ -1576,3 +1576,52 @@
>>  { 'command': 'guest-get-diskstats',
>>    'returns': ['GuestDiskStatsInfo']
>>  }
>> +
>> +##
>> +# @GuestCpuStats:
>> +#
>> +# Get statistics of each CPU in millisecond.
>> +#
>> +# @cpu: CPU index in guest OS
>> +#
>> +# @user: CPU time of user mode
>>
>
> "Time spent in user mode." is more understandable (from man proc(5))
>
> Same for the other descriptions.
>
> +#
>> +# @system: CPU time of system mode
>> +#
>> +# @idle: CPU time of idle state
>> +#
>> +# @iowait: CPU time waiting IO
>> +#
>> +# @irq: CPU time of hardware interrupt
>> +#
>> +# @softirq: CPU time of soft interrupt
>> +#
>> +# @steal: CPU time stolen by host
>> +#
>> +# @guest: CPU time of running guest mode
>>
>
> Why not "guest_nice" ?
>
> Do we expect this struct to be equally meaningful for other OSes?
> Otherwise, I would suggest to make a "linux" variant, perhaps.
>

The /proc/stat is very Linux-specific, so I think if we implement something
similar for Windows, we have another structure.


>
> +#
>> +# Since: 7.1
>> +##
>> +{ 'struct': 'GuestCpuStats',
>> +  'data': {'cpu': 'int',
>> +           '*user': 'uint64',
>> +           '*system': 'uint64',
>> +           '*idle': 'uint64',
>> +           '*iowait': 'uint64',
>> +           '*irq': 'uint64',
>> +           '*softirq': 'uint64',
>> +           '*steal': 'uint64',
>> +           '*guest': 'uint64'
>> +           } }
>> +
>> +##
>> +# @guest-get-cpustats:
>> +#
>> +# Retrieve information about CPU stats.
>> +# Returns: List of CPU stats of guest.
>> +#
>> +# Since: 7.1
>> +##
>> +{ 'command': 'guest-get-cpustats',
>> +  'returns': ['GuestCpuStats']
>> +}
>> --
>> 2.20.1
>>
>>
>>
>
> --
> Marc-André Lureau
>
diff mbox series

Patch

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0469dc409d..2847023876 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -2893,6 +2893,73 @@  GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
     return guest_get_diskstats(errp);
 }
 
+GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
+{
+    GuestCpuStatsList *head = NULL, **tail = &head;
+    const char *cpustats = "/proc/stat";
+    FILE *fp;
+    size_t n;
+    char *line = NULL;
+
+    fp = fopen(cpustats, "r");
+    if (fp  == NULL) {
+        error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
+        return NULL;
+    }
+
+    while (getline(&line, &n, fp) != -1) {
+        GuestCpuStats *cpustat = NULL;
+        int i;
+        unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
+        unsigned long nice, guest_nice;
+        char name[64];
+
+        i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+                   name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
+                   &steal, &guest, &guest_nice);
+
+        /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
+        if (strncmp(name, "cpu", 3) || (name[3] == '\0')) {
+            continue;
+        }
+
+        cpustat = g_new0(GuestCpuStats, 1);
+        cpustat->cpu = atoi(&name[3]);
+        cpustat->has_user = true;
+        cpustat->user = user * 10;
+        cpustat->has_system = true;
+        cpustat->system = system * 10;
+        cpustat->has_idle = true;
+        cpustat->idle = idle * 10;
+
+        /* Linux version >= 2.6 */
+        if (i > 5) {
+            cpustat->has_iowait = true;
+            cpustat->iowait = iowait * 10;
+            cpustat->has_irq = true;
+            cpustat->irq = irq * 10;
+            cpustat->has_softirq = true;
+            cpustat->softirq = softirq * 10;
+        }
+
+        if (i > 8) {
+            cpustat->has_steal = true;
+            cpustat->steal = steal * 10;
+        }
+
+        if (i > 9) {
+            cpustat->has_guest = true;
+            cpustat->guest = guest * 10;
+        }
+
+        QAPI_LIST_APPEND(tail, cpustat);
+    }
+
+    free(line);
+    fclose(fp);
+    return head;
+}
+
 #else /* defined(__linux__) */
 
 void qmp_guest_suspend_disk(Error **errp)
@@ -3247,6 +3314,11 @@  GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
     return NULL;
 }
 
+GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
 
 #endif /* CONFIG_FSFREEZE */
 
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 36f94c0f9c..7ed7664715 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -2543,3 +2543,9 @@  GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
     error_setg(errp, QERR_UNSUPPORTED);
     return NULL;
 }
+
+GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 9fa20e791b..4859c887b2 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -1576,3 +1576,52 @@ 
 { 'command': 'guest-get-diskstats',
   'returns': ['GuestDiskStatsInfo']
 }
+
+##
+# @GuestCpuStats:
+#
+# Get statistics of each CPU in millisecond.
+#
+# @cpu: CPU index in guest OS
+#
+# @user: CPU time of user mode
+#
+# @system: CPU time of system mode
+#
+# @idle: CPU time of idle state
+#
+# @iowait: CPU time waiting IO
+#
+# @irq: CPU time of hardware interrupt
+#
+# @softirq: CPU time of soft interrupt
+#
+# @steal: CPU time stolen by host
+#
+# @guest: CPU time of running guest mode
+#
+# Since: 7.1
+##
+{ 'struct': 'GuestCpuStats',
+  'data': {'cpu': 'int',
+           '*user': 'uint64',
+           '*system': 'uint64',
+           '*idle': 'uint64',
+           '*iowait': 'uint64',
+           '*irq': 'uint64',
+           '*softirq': 'uint64',
+           '*steal': 'uint64',
+           '*guest': 'uint64'
+           } }
+
+##
+# @guest-get-cpustats:
+#
+# Retrieve information about CPU stats.
+# Returns: List of CPU stats of guest.
+#
+# Since: 7.1
+##
+{ 'command': 'guest-get-cpustats',
+  'returns': ['GuestCpuStats']
+}