timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that allows for a high resolution event timer signaled through a file descriptor. This patch adds an alarm timer implementation using timerfd.
timerfd has a couple advantages over dynticks. Namely, it's fd based instead of signal based which reduces the likelihood of receiving EINTR failure codes. Because we can't process timers in a signal handler, we write to a pipe() and read that in the main loop. timerfd allows us to avoid the extra write to a pipe. Finally, since timerfd is just a file descriptor, it could conceivably used to implement per-thread alarm timers. It also would integrate very easily in a glib main loop. Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c right now otherwise the refactoring would be trivial. I'll leave that for another day. Cc: Paolo Bonzini <pbonz...@redhat.com> Cc: Jan Kiszka <jan.kis...@siemens.com> Signed-off-by: Anthony Liguori <aligu...@us.ibm.com> --- Please note, this is lightly tested. Since this is such a fundamental change, I'd like to do some performance analysis before committing but wanted to share early. --- configure | 14 +++++++++ qemu-timer.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 0 deletions(-) diff --git a/configure b/configure index 8564142..8a3135b 100755 --- a/configure +++ b/configure @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then sync_file_range=yes fi +# check for timerfd +timerfd="no" +cat > $TMPC << EOF +#include <sys/timerfd.h> +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); } +EOF + +if compile_prog "" "" ; then + timerfd=yes +fi + # check for linux/fiemap.h and FS_IOC_FIEMAP fiemap=no cat > $TMPC << EOF @@ -3467,6 +3478,9 @@ fi if test "$signalfd" = "yes" ; then echo "CONFIG_SIGNALFD=y" >> $config_host_mak fi +if test "$timerfd" = "yes" ; then + echo "CONFIG_TIMERFD=y" >> $config_host_mak +fi if test "$tcg_interpreter" = "yes" ; then echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak fi diff --git a/qemu-timer.c b/qemu-timer.c index c7a1551..6f7fa35 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -35,6 +35,10 @@ #include <mmsystem.h> #endif +#ifdef CONFIG_TIMERFD +#include <sys/timerfd.h> +#endif + /***********************************************************/ /* timers */ @@ -66,6 +70,9 @@ struct qemu_alarm_timer { int (*start)(struct qemu_alarm_timer *t); void (*stop)(struct qemu_alarm_timer *t); void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns); +#ifdef CONFIG_TIMERFD + int timerfd; +#endif #if defined(__linux__) timer_t timer; int fd; @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) /* TODO: MIN_TIMER_REARM_NS should be optimized */ #define MIN_TIMER_REARM_NS 250000 +#ifdef CONFIG_TIMERFD +static int timerfd_start_timer(struct qemu_alarm_timer *t); +static void timerfd_stop_timer(struct qemu_alarm_timer *t); +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); +#endif + #ifdef _WIN32 static int mm_start_timer(struct qemu_alarm_timer *t); @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); #endif /* _WIN32 */ static struct qemu_alarm_timer alarm_timers[] = { +#ifdef CONFIG_TIMERFD + {"timerfd", timerfd_start_timer, + timerfd_stop_timer, timerfd_rearm_timer}, +#endif #ifndef _WIN32 #ifdef __linux__ {"dynticks", dynticks_start_timer, @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum) #include "compatfd.h" +static void timerfd_event(void *opaque) +{ + struct qemu_alarm_timer *t = opaque; + ssize_t len; + uint64_t count; + + len = read(t->timerfd, &count, sizeof(count)); + g_assert(len == sizeof(count)); + + t->expired = true; + t->pending = true; + + /* We already process pending timers at the end of the main loop whereas + * this function is called during I/O processing. That means we know that + * pending timers will be checked before select()'ing again which means we + * don't need to explicitly call qemu_notify_event() + */ +} + +static int timerfd_start_timer(struct qemu_alarm_timer *t) +{ + t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); + if (t->timerfd == -1) { + return -errno; + } + + qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t); + + return 0; +} + +static void timerfd_stop_timer(struct qemu_alarm_timer *t) +{ + qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL); + close(t->timerfd); + t->timerfd = -1; +} + +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, + int64_t nearest_delta_ns) +{ + int ret; + struct itimerspec timeout; + int64_t current_ns; + + if (nearest_delta_ns < MIN_TIMER_REARM_NS) { + nearest_delta_ns = MIN_TIMER_REARM_NS; + } + + /* check whether a timer is already running */ + ret = timerfd_gettime(t->timerfd, &timeout); + g_assert(ret == 0); + + current_ns = timeout.it_value.tv_sec * 1000000000LL; + current_ns += timeout.it_value.tv_nsec; + + if (current_ns && current_ns <= nearest_delta_ns) { + return; + } + + timeout.it_interval.tv_sec = 0; + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ + timeout.it_value.tv_sec = nearest_delta_ns / 1000000000; + timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000; + + ret = timerfd_settime(t->timerfd, 0, &timeout, NULL); + g_assert(ret == 0); +} + static int dynticks_start_timer(struct qemu_alarm_timer *t) { struct sigevent ev; -- 1.7.5.4