Could anybody review it? On 12/21/2012 05:46 PM, Satoru Moriya wrote: > Changelog > v1 -> v2 > - add RFC tag again > - change semantics as follows > - set event threads' priority to maxprio > - set vcpu threads' priority to maxprio - 1 > - isolate all the posix stuff and put them into os_prioritize() in > os-posix.c/qemu-os-win32.h to avoid breaking win32 build > - introduce qemu_init_realtime(), qemu_realtime_is_enable and > qemu_realtime_get_parameters() and struct QemuRealtimeInfo to > keep realtime option and remove related global variables in vl.c > - add other benchmark(qpid-latency-test) result > > We have some plans to migrate old enterprise/control systems which > require low latency (msec order) to kvm virtualized environment. In > order to satify the requirements, this patch adds realtime option to qemu: > > -realtime maxprio=<prio>,policy=<pol> > > This option change the scheduling policy and priority to realtime one > (event threads: maxprio, vcpu threads: maxprio - 1) and mlock all qemu > and guest memory. > > Of course, we need more improvements to keep latency low in qemu > virtualized environment and this is a first step. OTOH, we can meet > the requirement of our first migration project with this patch. > > [ Note ] > This version doesn't support vhost, vpnc and linux-aio. > > These are some basic performance results: > > Host : 4 core, 4GB > Guest: 1 core, 512MB > > Benchmark: qpid-latency-test > http://qpid.apache.org/ > https://access.redhat.com/knowledge/docs/en-US/Red_Hat_Enterprise_MRG/2/html/Messaging_Installation_and_Configuration_Guide/qpid_latency_test.html > > Command: > - qemu > $ qemu -smp 1 -m 512 -enable-kvm -netdev tap,id=hostnet1 -device > virtio-net-pci,netdev=hostnet1 -drive file=vm.img,if=virtio > (-realtime maxprio=99,policy=fifo) > > - benchmark > $ chrt -f 99 qpid-latency-test --tcp-nodelay --rate 10000 -b > <server> > > Results: worst latency (msec) from 100 run > - no load > 1. normal qemu : 17.468400 > 2. chrt qemu(*) : 10.019900 > 3. realtime qemu: 8.048370 > > - load (iperf, server:vm, client:other physical sercer) > 4. normal qemu : 26.711100 > 5. chrt qemu : 8.485140 > 6. realtime qemu: 10.176700 > > (*) $ chrt -f -p 99 <event_thread_tid> > $ chrt -f -p 98 <vcpu_thread_tid> > > Any comments are welcome. > > Regards, > Satoru > > Signed-off-by: Satoru Moriya <satoru.mor...@hds.com> > --- > cpus.c | 17 +++++++++++++++++ > include/qemu/thread.h | 4 ++++ > include/sysemu/os-posix.h | 1 + > include/sysemu/os-win32.h | 1 + > os-posix.c | 48 > +++++++++++++++++++++++++++++++++++++++++++++++ > qemu-config.c | 16 ++++++++++++++++ > qemu-options.hx | 9 +++++++++ > qemu-thread-posix.c | 27 ++++++++++++++++++++++++++ > qemu-thread-win32.c | 13 +++++++++++++ > vl.c | 33 ++++++++++++++++++++++++++++++++ > 10 files changed, 169 insertions(+) > > diff --git a/cpus.c b/cpus.c > index 4a7782a..a049970 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -734,6 +734,9 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) > CPUArchState *env = arg; > CPUState *cpu = ENV_GET_CPU(env); > int r; > + int rt_policy, rt_priority; > + struct sched_param sp; > + > > qemu_mutex_lock(&qemu_global_mutex); > qemu_thread_get_self(cpu->thread); > @@ -746,6 +749,20 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) > exit(1); > } > > + if (qemu_realtime_is_enabled()) { > + qemu_realtime_get_parameters(&rt_policy, &rt_priority); > + /* > + * vcpu threads' priority must be set to event thread priority -1 > + * to avoid starvation. > + */ > + sp.sched_priority = rt_priority - 1; > + r = sched_setscheduler(0, rt_policy, &sp); > + if (r < 0) { > + perror("Setting realtime policy failed"); > + exit(1); > + } > + } > + > qemu_kvm_init_cpu_signals(env); > > /* signal CPU creation */ > diff --git a/include/qemu/thread.h b/include/qemu/thread.h index > c02404b..3d8b3d2 100644 > --- a/include/qemu/thread.h > +++ b/include/qemu/thread.h > @@ -53,4 +53,8 @@ void qemu_thread_get_self(QemuThread *thread); bool > qemu_thread_is_self(QemuThread *thread); void qemu_thread_exit(void > *retval); > > +void qemu_init_realtime(int, int); > +bool qemu_realtime_is_enabled(void); > +void qemu_realtime_get_parameters(int *, int *); > + > #endif > diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h > index 7f198e4..e5995b0 100644 > --- a/include/sysemu/os-posix.h > +++ b/include/sysemu/os-posix.h > @@ -31,6 +31,7 @@ void os_set_proc_name(const char *s); void > os_setup_signal_handling(void); void os_daemonize(void); void > os_setup_post(void); > +void os_prioritize(const char *, int); > > typedef struct timeval qemu_timeval; > #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) diff --git > a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index > d0e9234..946b566 100644 > --- a/include/sysemu/os-win32.h > +++ b/include/sysemu/os-win32.h > @@ -78,6 +78,7 @@ static inline void os_daemonize(void) {} static > inline void os_setup_post(void) {} void os_set_line_buffering(void); > static inline void os_set_proc_name(const char *dummy) {} > +static inline void os_prioritize(const char *pol, int prio) {} > > #if !defined(EPROTONOSUPPORT) > # define EPROTONOSUPPORT EINVAL > diff --git a/os-posix.c b/os-posix.c > index 5c64518..8fe0fa2 100644 > --- a/os-posix.c > +++ b/os-posix.c > @@ -33,12 +33,14 @@ > #include <pwd.h> > #include <grp.h> > #include <libgen.h> > +#include <sched.h> > > /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" > #include "sysemu/sysemu.h" > #include "net/slirp.h" > #include "qemu-options.h" > +#include "qemu-thread.h" > > #ifdef CONFIG_LINUX > #include <sys/prctl.h> > @@ -363,3 +365,49 @@ bool is_daemonized(void) { > return daemonize; > } > + > +void os_prioritize(const char *rt_sched_policy, int > +max_sched_priority) { > + int rt_pol, sys_min_prio, sys_max_prio; > + > + if (rt_sched_policy) { > + if (!strcmp(rt_sched_policy, "rr")) { > + rt_pol = SCHED_RR; > + } else if (!strcmp(rt_sched_policy, "fifo")) { > + rt_pol = SCHED_FIFO; > + } else { > + fprintf(stderr, "qemu: invalid option value '%s'\n", > + rt_sched_policy); > + exit(1); > + } > + } else { > + rt_pol = SCHED_RR; > + } > + > + sys_min_prio = sched_get_priority_min(rt_pol); > + sys_max_prio = sched_get_priority_max(rt_pol); > + > + if (max_sched_priority < sys_min_prio + 1) { > + /* > + * We set event threads' priority to max_sched_priorty and > + * vcpu threads' to max_sched_priority - 1 in order to avoid > + * starvation. So, it must be > sys_min_prio + 1. > + */ > + fprintf(stderr, "qemu: invalid option maxprio=%d. It must be >= > %d\n", > + max_sched_priority, sys_min_prio + 1); > + exit(1); > + } > + > + if (sys_max_prio < max_sched_priority) { > + fprintf(stderr, "qemu: invalid option maxprio=%d. It must be <= > %d\n", > + max_sched_priority, sys_max_prio); > + exit(1); > + } > + > + qemu_init_realtime(rt_pol, max_sched_priority); > + > + if (mlockall(MCL_CURRENT | MCL_FUTURE)) { > + perror("mlockall"); > + exit(1); > + } > +} > diff --git a/qemu-config.c b/qemu-config.c index 2188c3e..b945d07 > 100644 > --- a/qemu-config.c > +++ b/qemu-config.c > @@ -647,6 +647,21 @@ static QemuOptsList qemu_object_opts = { > }, > }; > > +static QemuOptsList qemu_realtime_opts = { > + .name = "realtime", > + .head = QTAILQ_HEAD_INITIALIZER(qemu_realtime_opts.head), > + .desc = { > + { > + .name = "maxprio", > + .type = QEMU_OPT_NUMBER, > + }, { > + .name = "policy", > + .type = QEMU_OPT_STRING, > + }, > + { /* end of list */ } > + }, > +}; > + > static QemuOptsList *vm_config_groups[32] = { > &qemu_drive_opts, > &qemu_chardev_opts, > @@ -664,6 +679,7 @@ static QemuOptsList *vm_config_groups[32] = { > &qemu_sandbox_opts, > &qemu_add_fd_opts, > &qemu_object_opts, > + &qemu_realtime_opts, > NULL, > }; > > diff --git a/qemu-options.hx b/qemu-options.hx index 9df0cde..968a20a > 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -2447,6 +2447,15 @@ STEXI > Do not start CPU at startup (you must type 'c' in the monitor). > ETEXI > > +DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, > + "-realtime maxprio=prio[,policy=pol]\n", > + QEMU_ARCH_ALL) > +STEXI > +@item -realtime maxprio=@var{prio}[,policy=@var{pold}] > +@findex -realtime > +run qemu as realtime process with priority @var{prio} and policy @var{pol}. > +ETEXI > + > DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \ > "-gdb dev wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL) > STEXI > diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c index > 7be292e..10a97cc 100644 > --- a/qemu-thread-posix.c > +++ b/qemu-thread-posix.c > @@ -22,6 +22,15 @@ > #include <sys/time.h> > #include "qemu/thread.h" > > +struct QemuRealtimeInfo { > + bool is_realtime; > + int policy; > + int max_priority; > +}; > +typedef struct QemuRealtimeInfo QemuRealtimeInfo; > + > +static QemuRealtimeInfo rt_info; > + > static void error_exit(int err, const char *msg) { > fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); @@ -324,3 > +333,21 @@ void *qemu_thread_join(QemuThread *thread) > } > return ret; > } > + > +void qemu_init_realtime(int rt_sched_policy, int max_sched_priority) > +{ > + rt_info.is_realtime = true; > + rt_info.policy = rt_sched_policy; > + rt_info.max_priority = max_sched_priority; } > + > +bool qemu_realtime_is_enabled(void) > +{ > + return rt_info.is_realtime; > +} > + > +void qemu_realtime_get_parameters(int *policy, int *max_priority) { > + *policy = rt_info.policy; > + *max_priority = rt_info.max_priority; } > diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c index > 8037b39..3beebcf 100644 > --- a/qemu-thread-win32.c > +++ b/qemu-thread-win32.c > @@ -369,3 +369,16 @@ bool qemu_thread_is_self(QemuThread *thread) { > return GetCurrentThreadId() == thread->tid; } > + > +void qemu_init_realtime(int rt_sched_policy, int max_sched_priority) > +{ } > + > +bool qemu_realtime_is_enabled(void) > +{ > + return false; > +} > + > +void qemu_realtime_get_parameters(int *policy, int *max_priority) { } > diff --git a/vl.c b/vl.c > index e6a8d89..c310587 100644 > --- a/vl.c > +++ b/vl.c > @@ -29,6 +29,7 @@ > #include <sys/time.h> > #include <zlib.h> > #include "qemu/bitmap.h" > +#include "qemu-thread.h" > > /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" > @@ -1148,6 +1149,17 @@ static void smp_parse(const char *optarg) > max_cpus = smp_cpus; > } > > +static void configure_realtime(QemuOpts *opts) { > + const char *pol; > + int prio; > + > + pol = qemu_opt_get(opts, "policy"); > + prio = qemu_opt_get_number(opts, "maxprio", 1); > + > + os_prioritize(pol, prio); > +} > + > /***********************************************************/ > /* USB devices */ > > @@ -1754,9 +1766,22 @@ static void main_loop(void) { > bool nonblocking; > int last_io = 0; > + int rt_policy, rt_priority; > + struct sched_param sp; > #ifdef CONFIG_PROFILER > int64_t ti; > #endif > + > + if (qemu_realtime_is_enabled()) { > + qemu_realtime_get_parameters(&rt_policy, &rt_priority); > + > + sp.sched_priority = rt_priority;; > + if (sched_setscheduler(0, rt_policy, &sp) < 0) { > + perror("Setting realtime policy failed"); > + exit(1); > + } > + } > + > do { > nonblocking = !kvm_enabled() && last_io > 0; #ifdef > CONFIG_PROFILER @@ -2758,6 +2783,14 @@ int main(int argc, char **argv, > char **envp) > } > numa_add(optarg); > break; > + case QEMU_OPTION_realtime: > + opts = qemu_opts_parse(qemu_find_opts("realtime"), optarg, > 0); > + if (!opts) { > + fprintf(stderr, "parse error: %s\n", optarg); > + exit(1); > + } > + configure_realtime(opts); > + break; > case QEMU_OPTION_display: > display_type = select_display(optarg); > break; >