On Tue, Sep 18, 2012 at 4:37 PM, Anthony Liguori <aligu...@us.ibm.com>wrote:
> timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that > allows for a high resolution event timer signaled through a file > descriptor. > This patch adds an alarm timer implementation using timerfd. > > timerfd has a couple advantages over dynticks. Namely, it's fd based > instead > of signal based which reduces the likelihood of receiving EINTR failure > codes. > > Because we can't process timers in a signal handler, we write to a pipe() > and > read that in the main loop. timerfd allows us to avoid the extra write to > a > pipe. > > Finally, since timerfd is just a file descriptor, it could conceivably > used to > implement per-thread alarm timers. It also would integrate very easily in > a > glib main loop. > > Unfortunately, there's a lot of Windows code in qemu-timer.c and > main-loop.c > right now otherwise the refactoring would be trivial. I'll leave that for > another day. > > Cc: Paolo Bonzini <pbonz...@redhat.com> > Cc: Jan Kiszka <jan.kis...@siemens.com> > Signed-off-by: Anthony Liguori <aligu...@us.ibm.com> > --- > Please note, this is lightly tested. Since this is such a fundamental > change, > I'd like to do some performance analysis before committing but wanted to > share > early. > --- > configure | 14 +++++++++ > qemu-timer.c | 86 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 100 insertions(+), 0 deletions(-) > > diff --git a/configure b/configure > index 8564142..8a3135b 100755 > --- a/configure > +++ b/configure > @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then > sync_file_range=yes > fi > > +# check for timerfd > +timerfd="no" > +cat > $TMPC << EOF > +#include <sys/timerfd.h> > +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); } > +EOF > + > +if compile_prog "" "" ; then > + timerfd=yes > +fi > + > # check for linux/fiemap.h and FS_IOC_FIEMAP > fiemap=no > cat > $TMPC << EOF > @@ -3467,6 +3478,9 @@ fi > if test "$signalfd" = "yes" ; then > echo "CONFIG_SIGNALFD=y" >> $config_host_mak > fi > +if test "$timerfd" = "yes" ; then > + echo "CONFIG_TIMERFD=y" >> $config_host_mak > +fi > if test "$tcg_interpreter" = "yes" ; then > echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak > fi > diff --git a/qemu-timer.c b/qemu-timer.c > index c7a1551..6f7fa35 100644 > --- a/qemu-timer.c > +++ b/qemu-timer.c > @@ -35,6 +35,10 @@ > #include <mmsystem.h> > #endif > > +#ifdef CONFIG_TIMERFD > +#include <sys/timerfd.h> > +#endif > + > /***********************************************************/ > /* timers */ > > @@ -66,6 +70,9 @@ struct qemu_alarm_timer { > int (*start)(struct qemu_alarm_timer *t); > void (*stop)(struct qemu_alarm_timer *t); > void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns); > +#ifdef CONFIG_TIMERFD > + int timerfd; > +#endif > #if defined(__linux__) > timer_t timer; > int fd; > @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct > qemu_alarm_timer *t) > /* TODO: MIN_TIMER_REARM_NS should be optimized */ > #define MIN_TIMER_REARM_NS 250000 > > +#ifdef CONFIG_TIMERFD > +static int timerfd_start_timer(struct qemu_alarm_timer *t); > +static void timerfd_stop_timer(struct qemu_alarm_timer *t); > +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t > delta); > +#endif > + > #ifdef _WIN32 > > static int mm_start_timer(struct qemu_alarm_timer *t); > @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct > qemu_alarm_timer *t, int64_t delta); > #endif /* _WIN32 */ > > static struct qemu_alarm_timer alarm_timers[] = { > +#ifdef CONFIG_TIMERFD > + {"timerfd", timerfd_start_timer, > + timerfd_stop_timer, timerfd_rearm_timer}, > +#endif > #ifndef _WIN32 > #ifdef __linux__ > {"dynticks", dynticks_start_timer, > @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum) > > #include "compatfd.h" > > +static void timerfd_event(void *opaque) > +{ > + struct qemu_alarm_timer *t = opaque; > + ssize_t len; > + uint64_t count; > + > + len = read(t->timerfd, &count, sizeof(count)); > + g_assert(len == sizeof(count)); > + > + t->expired = true; > + t->pending = true; > + > + /* We already process pending timers at the end of the main loop > whereas > + * this function is called during I/O processing. That means we know > that > + * pending timers will be checked before select()'ing again which > means we > + * don't need to explicitly call qemu_notify_event() > + */ > +} > + > +static int timerfd_start_timer(struct qemu_alarm_timer *t) > +{ > + t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); > + if (t->timerfd == -1) { > + return -errno; > + } > + > + qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t); > + > + return 0; > +} > + > +static void timerfd_stop_timer(struct qemu_alarm_timer *t) > +{ > + qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL); > + close(t->timerfd); > + t->timerfd = -1; > +} > + > +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, > + int64_t nearest_delta_ns) > +{ > + int ret; > + struct itimerspec timeout; > + int64_t current_ns; > + > + if (nearest_delta_ns < MIN_TIMER_REARM_NS) { > + nearest_delta_ns = MIN_TIMER_REARM_NS; > + } > It has been a few months, but IIRC, this pattern can lead to spurious timeouts if these methods are not used correctly. If folks are interested, I can spend some time to dig up my past work on this to explain more. > + /* check whether a timer is already running */ > + ret = timerfd_gettime(t->timerfd, &timeout); > + g_assert(ret == 0); > + > + current_ns = timeout.it_value.tv_sec * 1000000000LL; > + current_ns += timeout.it_value.tv_nsec; > + > + if (current_ns && current_ns <= nearest_delta_ns) { > + return; > + } > + > + timeout.it_interval.tv_sec = 0; > + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ > + timeout.it_value.tv_sec = nearest_delta_ns / 1000000000; > + timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000; > + > + ret = timerfd_settime(t->timerfd, 0, &timeout, NULL); > + g_assert(ret == 0); > +} > + > static int dynticks_start_timer(struct qemu_alarm_timer *t) > { > struct sigevent ev; > -- > 1.7.5.4 > > >