Hi On Fri, Jul 1, 2022 at 7:11 AM zhenwei pi <pizhen...@bytedance.com> wrote:
> A vCPU thread always reaches 100% utilization when: > - guest uses idle=poll > - disable HLT vm-exit > - enable MWAIT > > Add new guest agent command 'guest-get-cpustats' to get guest CPU > statistics, we can know the guest workload and how busy the CPU is. > > Signed-off-by: zhenwei pi <pizhen...@bytedance.com> > --- > qga/commands-posix.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ > qga/commands-win32.c | 6 ++++ > qga/qapi-schema.json | 49 ++++++++++++++++++++++++++++++ > 3 files changed, 127 insertions(+) > > diff --git a/qga/commands-posix.c b/qga/commands-posix.c > index 0469dc409d..2847023876 100644 > --- a/qga/commands-posix.c > +++ b/qga/commands-posix.c > @@ -2893,6 +2893,73 @@ GuestDiskStatsInfoList > *qmp_guest_get_diskstats(Error **errp) > return guest_get_diskstats(errp); > } > > +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) > +{ > + GuestCpuStatsList *head = NULL, **tail = &head; > + const char *cpustats = "/proc/stat"; > + FILE *fp; > + size_t n; > + char *line = NULL; > + > + fp = fopen(cpustats, "r"); > + if (fp == NULL) { > + error_setg_errno(errp, errno, "open(\"%s\")", cpustats); > + return NULL; > + } > + > + while (getline(&line, &n, fp) != -1) { > + GuestCpuStats *cpustat = NULL; > + int i; > + unsigned long user, system, idle, iowait, irq, softirq, steal, > guest; > + unsigned long nice, guest_nice; > + char name[64]; > + > + i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", > + name, &user, &nice, &system, &idle, &iowait, &irq, > &softirq, > + &steal, &guest, &guest_nice); > + > + /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */ > + if (strncmp(name, "cpu", 3) || (name[3] == '\0')) { > For extra safety, check !name as well > + continue; > + } > + > if i < 5, I guess you should warn and continue > + cpustat = g_new0(GuestCpuStats, 1); > + cpustat->cpu = atoi(&name[3]); > + cpustat->has_user = true; > + cpustat->user = user * 10; > proc(5) says that the value is given "in units of USER_HZ (1/100ths of a second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value)", so we should adjust this code if we want to return correctly in ms. > + cpustat->has_system = true; > + cpustat->system = system * 10; > + cpustat->has_idle = true; > + cpustat->idle = idle * 10; > + > + /* Linux version >= 2.6 */ > That's pretty old now (2003), not sure anyone would care about that comment, but np ;) > + if (i > 5) { > + cpustat->has_iowait = true; > + cpustat->iowait = iowait * 10; > + cpustat->has_irq = true; > + cpustat->irq = irq * 10; > + cpustat->has_softirq = true; > + cpustat->softirq = softirq * 10; > + } > + > + if (i > 8) { > + cpustat->has_steal = true; > + cpustat->steal = steal * 10; > + } > + > + if (i > 9) { > + cpustat->has_guest = true; > + cpustat->guest = guest * 10; > + } > + > + QAPI_LIST_APPEND(tail, cpustat); > + } > + > + free(line); > + fclose(fp); > + return head; > +} > + > #else /* defined(__linux__) */ > > void qmp_guest_suspend_disk(Error **errp) > @@ -3247,6 +3314,11 @@ GuestDiskStatsInfoList > *qmp_guest_get_diskstats(Error **errp) > return NULL; > } > > +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) > +{ > + error_setg(errp, QERR_UNSUPPORTED); > + return NULL; > +} > > #endif /* CONFIG_FSFREEZE */ > > diff --git a/qga/commands-win32.c b/qga/commands-win32.c > index 36f94c0f9c..7ed7664715 100644 > --- a/qga/commands-win32.c > +++ b/qga/commands-win32.c > @@ -2543,3 +2543,9 @@ GuestDiskStatsInfoList > *qmp_guest_get_diskstats(Error **errp) > error_setg(errp, QERR_UNSUPPORTED); > return NULL; > } > + > +GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp) > +{ > + error_setg(errp, QERR_UNSUPPORTED); > + return NULL; > +} > diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json > index 9fa20e791b..4859c887b2 100644 > --- a/qga/qapi-schema.json > +++ b/qga/qapi-schema.json > @@ -1576,3 +1576,52 @@ > { 'command': 'guest-get-diskstats', > 'returns': ['GuestDiskStatsInfo'] > } > + > +## > +# @GuestCpuStats: > +# > +# Get statistics of each CPU in millisecond. > +# > +# @cpu: CPU index in guest OS > +# > +# @user: CPU time of user mode > "Time spent in user mode." is more understandable (from man proc(5)) Same for the other descriptions. +# > +# @system: CPU time of system mode > +# > +# @idle: CPU time of idle state > +# > +# @iowait: CPU time waiting IO > +# > +# @irq: CPU time of hardware interrupt > +# > +# @softirq: CPU time of soft interrupt > +# > +# @steal: CPU time stolen by host > +# > +# @guest: CPU time of running guest mode > Why not "guest_nice" ? Do we expect this struct to be equally meaningful for other OSes? Otherwise, I would suggest to make a "linux" variant, perhaps. +# > +# Since: 7.1 > +## > +{ 'struct': 'GuestCpuStats', > + 'data': {'cpu': 'int', > + '*user': 'uint64', > + '*system': 'uint64', > + '*idle': 'uint64', > + '*iowait': 'uint64', > + '*irq': 'uint64', > + '*softirq': 'uint64', > + '*steal': 'uint64', > + '*guest': 'uint64' > + } } > + > +## > +# @guest-get-cpustats: > +# > +# Retrieve information about CPU stats. > +# Returns: List of CPU stats of guest. > +# > +# Since: 7.1 > +## > +{ 'command': 'guest-get-cpustats', > + 'returns': ['GuestCpuStats'] > +} > -- > 2.20.1 > > > -- Marc-André Lureau