Changelog v1 -> v2 - add RFC tag again - change semantics as follows - set event threads' priority to maxprio - set vcpu threads' priority to maxprio - 1 - isolate all the posix stuff and put them into os_prioritize() in os-posix.c/qemu-os-win32.h to avoid breaking win32 build - introduce qemu_init_realtime(), qemu_realtime_is_enable and qemu_realtime_get_parameters() and struct QemuRealtimeInfo to keep realtime option and remove related global variables in vl.c - add other benchmark(qpid-latency-test) result
We have some plans to migrate old enterprise/control systems which require low latency (msec order) to kvm virtualized environment. In order to satify the requirements, this patch adds realtime option to qemu: -realtime maxprio=<prio>,policy=<pol> This option change the scheduling policy and priority to realtime one (event threads: maxprio, vcpu threads: maxprio - 1) and mlock all qemu and guest memory. Of course, we need more improvements to keep latency low in qemu virtualized environment and this is a first step. OTOH, we can meet the requirement of our first migration project with this patch. [ Note ] This version doesn't support vhost, vpnc and linux-aio. These are some basic performance results: Host : 4 core, 4GB Guest: 1 core, 512MB Benchmark: qpid-latency-test http://qpid.apache.org/ https://access.redhat.com/knowledge/docs/en-US/Red_Hat_Enterprise_MRG/2/html/Messaging_Installation_and_Configuration_Guide/qpid_latency_test.html Command: - qemu $ qemu -smp 1 -m 512 -enable-kvm -netdev tap,id=hostnet1 -device virtio-net-pci,netdev=hostnet1 -drive file=vm.img,if=virtio (-realtime maxprio=99,policy=fifo) - benchmark $ chrt -f 99 qpid-latency-test --tcp-nodelay --rate 10000 -b <server> Results: worst latency (msec) from 100 run - no load 1. normal qemu : 17.468400 2. chrt qemu(*) : 10.019900 3. realtime qemu: 8.048370 - load (iperf, server:vm, client:other physical sercer) 4. normal qemu : 26.711100 5. chrt qemu : 8.485140 6. realtime qemu: 10.176700 (*) $ chrt -f -p 99 <event_thread_tid> $ chrt -f -p 98 <vcpu_thread_tid> Any comments are welcome. Regards, Satoru Signed-off-by: Satoru Moriya <satoru.mor...@hds.com> --- cpus.c | 17 +++++++++++++++++ include/qemu/thread.h | 4 ++++ include/sysemu/os-posix.h | 1 + include/sysemu/os-win32.h | 1 + os-posix.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++ qemu-config.c | 16 ++++++++++++++++ qemu-options.hx | 9 +++++++++ qemu-thread-posix.c | 27 ++++++++++++++++++++++++++ qemu-thread-win32.c | 13 +++++++++++++ vl.c | 33 ++++++++++++++++++++++++++++++++ 10 files changed, 169 insertions(+) diff --git a/cpus.c b/cpus.c index 4a7782a..a049970 100644 --- a/cpus.c +++ b/cpus.c @@ -734,6 +734,9 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) CPUArchState *env = arg; CPUState *cpu = ENV_GET_CPU(env); int r; + int rt_policy, rt_priority; + struct sched_param sp; + qemu_mutex_lock(&qemu_global_mutex); qemu_thread_get_self(cpu->thread); @@ -746,6 +749,20 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) exit(1); } + if (qemu_realtime_is_enabled()) { + qemu_realtime_get_parameters(&rt_policy, &rt_priority); + /* + * vcpu threads' priority must be set to event thread priority -1 + * to avoid starvation. + */ + sp.sched_priority = rt_priority - 1; + r = sched_setscheduler(0, rt_policy, &sp); + if (r < 0) { + perror("Setting realtime policy failed"); + exit(1); + } + } + qemu_kvm_init_cpu_signals(env); /* signal CPU creation */ diff --git a/include/qemu/thread.h b/include/qemu/thread.h index c02404b..3d8b3d2 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -53,4 +53,8 @@ void qemu_thread_get_self(QemuThread *thread); bool qemu_thread_is_self(QemuThread *thread); void qemu_thread_exit(void *retval); +void qemu_init_realtime(int, int); +bool qemu_realtime_is_enabled(void); +void qemu_realtime_get_parameters(int *, int *); + #endif diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 7f198e4..e5995b0 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -31,6 +31,7 @@ void os_set_proc_name(const char *s); void os_setup_signal_handling(void); void os_daemonize(void); void os_setup_post(void); +void os_prioritize(const char *, int); typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index d0e9234..946b566 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -78,6 +78,7 @@ static inline void os_daemonize(void) {} static inline void os_setup_post(void) {} void os_set_line_buffering(void); static inline void os_set_proc_name(const char *dummy) {} +static inline void os_prioritize(const char *pol, int prio) {} #if !defined(EPROTONOSUPPORT) # define EPROTONOSUPPORT EINVAL diff --git a/os-posix.c b/os-posix.c index 5c64518..8fe0fa2 100644 --- a/os-posix.c +++ b/os-posix.c @@ -33,12 +33,14 @@ #include <pwd.h> #include <grp.h> #include <libgen.h> +#include <sched.h> /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" #include "sysemu/sysemu.h" #include "net/slirp.h" #include "qemu-options.h" +#include "qemu-thread.h" #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -363,3 +365,49 @@ bool is_daemonized(void) { return daemonize; } + +void os_prioritize(const char *rt_sched_policy, int max_sched_priority) +{ + int rt_pol, sys_min_prio, sys_max_prio; + + if (rt_sched_policy) { + if (!strcmp(rt_sched_policy, "rr")) { + rt_pol = SCHED_RR; + } else if (!strcmp(rt_sched_policy, "fifo")) { + rt_pol = SCHED_FIFO; + } else { + fprintf(stderr, "qemu: invalid option value '%s'\n", + rt_sched_policy); + exit(1); + } + } else { + rt_pol = SCHED_RR; + } + + sys_min_prio = sched_get_priority_min(rt_pol); + sys_max_prio = sched_get_priority_max(rt_pol); + + if (max_sched_priority < sys_min_prio + 1) { + /* + * We set event threads' priority to max_sched_priorty and + * vcpu threads' to max_sched_priority - 1 in order to avoid + * starvation. So, it must be > sys_min_prio + 1. + */ + fprintf(stderr, "qemu: invalid option maxprio=%d. It must be >= %d\n", + max_sched_priority, sys_min_prio + 1); + exit(1); + } + + if (sys_max_prio < max_sched_priority) { + fprintf(stderr, "qemu: invalid option maxprio=%d. It must be <= %d\n", + max_sched_priority, sys_max_prio); + exit(1); + } + + qemu_init_realtime(rt_pol, max_sched_priority); + + if (mlockall(MCL_CURRENT | MCL_FUTURE)) { + perror("mlockall"); + exit(1); + } +} diff --git a/qemu-config.c b/qemu-config.c index 2188c3e..b945d07 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -647,6 +647,21 @@ static QemuOptsList qemu_object_opts = { }, }; +static QemuOptsList qemu_realtime_opts = { + .name = "realtime", + .head = QTAILQ_HEAD_INITIALIZER(qemu_realtime_opts.head), + .desc = { + { + .name = "maxprio", + .type = QEMU_OPT_NUMBER, + }, { + .name = "policy", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + static QemuOptsList *vm_config_groups[32] = { &qemu_drive_opts, &qemu_chardev_opts, @@ -664,6 +679,7 @@ static QemuOptsList *vm_config_groups[32] = { &qemu_sandbox_opts, &qemu_add_fd_opts, &qemu_object_opts, + &qemu_realtime_opts, NULL, }; diff --git a/qemu-options.hx b/qemu-options.hx index 9df0cde..968a20a 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2447,6 +2447,15 @@ STEXI Do not start CPU at startup (you must type 'c' in the monitor). ETEXI +DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, + "-realtime maxprio=prio[,policy=pol]\n", + QEMU_ARCH_ALL) +STEXI +@item -realtime maxprio=@var{prio}[,policy=@var{pold}] +@findex -realtime +run qemu as realtime process with priority @var{prio} and policy @var{pol}. +ETEXI + DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \ "-gdb dev wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL) STEXI diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c index 7be292e..10a97cc 100644 --- a/qemu-thread-posix.c +++ b/qemu-thread-posix.c @@ -22,6 +22,15 @@ #include <sys/time.h> #include "qemu/thread.h" +struct QemuRealtimeInfo { + bool is_realtime; + int policy; + int max_priority; +}; +typedef struct QemuRealtimeInfo QemuRealtimeInfo; + +static QemuRealtimeInfo rt_info; + static void error_exit(int err, const char *msg) { fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); @@ -324,3 +333,21 @@ void *qemu_thread_join(QemuThread *thread) } return ret; } + +void qemu_init_realtime(int rt_sched_policy, int max_sched_priority) +{ + rt_info.is_realtime = true; + rt_info.policy = rt_sched_policy; + rt_info.max_priority = max_sched_priority; +} + +bool qemu_realtime_is_enabled(void) +{ + return rt_info.is_realtime; +} + +void qemu_realtime_get_parameters(int *policy, int *max_priority) +{ + *policy = rt_info.policy; + *max_priority = rt_info.max_priority; +} diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c index 8037b39..3beebcf 100644 --- a/qemu-thread-win32.c +++ b/qemu-thread-win32.c @@ -369,3 +369,16 @@ bool qemu_thread_is_self(QemuThread *thread) { return GetCurrentThreadId() == thread->tid; } + +void qemu_init_realtime(int rt_sched_policy, int max_sched_priority) +{ +} + +bool qemu_realtime_is_enabled(void) +{ + return false; +} + +void qemu_realtime_get_parameters(int *policy, int *max_priority) +{ +} diff --git a/vl.c b/vl.c index e6a8d89..c310587 100644 --- a/vl.c +++ b/vl.c @@ -29,6 +29,7 @@ #include <sys/time.h> #include <zlib.h> #include "qemu/bitmap.h" +#include "qemu-thread.h" /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -1148,6 +1149,17 @@ static void smp_parse(const char *optarg) max_cpus = smp_cpus; } +static void configure_realtime(QemuOpts *opts) +{ + const char *pol; + int prio; + + pol = qemu_opt_get(opts, "policy"); + prio = qemu_opt_get_number(opts, "maxprio", 1); + + os_prioritize(pol, prio); +} + /***********************************************************/ /* USB devices */ @@ -1754,9 +1766,22 @@ static void main_loop(void) { bool nonblocking; int last_io = 0; + int rt_policy, rt_priority; + struct sched_param sp; #ifdef CONFIG_PROFILER int64_t ti; #endif + + if (qemu_realtime_is_enabled()) { + qemu_realtime_get_parameters(&rt_policy, &rt_priority); + + sp.sched_priority = rt_priority;; + if (sched_setscheduler(0, rt_policy, &sp) < 0) { + perror("Setting realtime policy failed"); + exit(1); + } + } + do { nonblocking = !kvm_enabled() && last_io > 0; #ifdef CONFIG_PROFILER @@ -2758,6 +2783,14 @@ int main(int argc, char **argv, char **envp) } numa_add(optarg); break; + case QEMU_OPTION_realtime: + opts = qemu_opts_parse(qemu_find_opts("realtime"), optarg, 0); + if (!opts) { + fprintf(stderr, "parse error: %s\n", optarg); + exit(1); + } + configure_realtime(opts); + break; case QEMU_OPTION_display: display_type = select_display(optarg); break; -- 1.7.11.7