Currently, timers run on iothread inside QBL, this limits the usage of timers in some case, e.g. virtio-blk-dataplane. In order to run timers on private thread based on different clocksource, we arm each AioContext with three timer lists in according to three clocksource (QemuClock).
A little later, we will run timers in aio_poll. Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> ------ issue to fix --- Note: before this patch, there should be another one to fix the race issue by qemu_mod_timer() and _run_timers(). --- async.c | 9 ++++++++ include/block/aio.h | 13 +++++++++++ include/qemu/timer.h | 20 ++++++++++++++++ qemu-timer.c | 65 +++++++++++++++++++++++++++++++--------------------- 4 files changed, 81 insertions(+), 26 deletions(-) diff --git a/async.c b/async.c index ba4072c..7e2340e 100644 --- a/async.c +++ b/async.c @@ -201,11 +201,15 @@ static void aio_ctx_finalize(GSource *source) { AioContext *ctx = (AioContext *) source; + int i; thread_pool_free(ctx->thread_pool); aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); event_notifier_cleanup(&ctx->notifier); g_array_free(ctx->pollfds, TRUE); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_finalize(&ctx->timer_list[i]); + } } static GSourceFuncs aio_source_funcs = { @@ -237,6 +241,8 @@ void aio_notify(AioContext *ctx) AioContext *aio_context_new(void) { AioContext *ctx; + int i; + ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->thread_pool = NULL; @@ -245,6 +251,9 @@ AioContext *aio_context_new(void) aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) event_notifier_test_and_clear, NULL); + for (i = 0; i < QEMU_CLOCK_MAXCNT; i++) { + timer_list_init(&ctx->timer_list[i]); + } return ctx; } diff --git a/include/block/aio.h b/include/block/aio.h index 04598b2..cf41b42 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -43,6 +43,18 @@ typedef struct AioHandler AioHandler; typedef void QEMUBHFunc(void *opaque); typedef void IOHandler(void *opaque); +/* Related timer with AioContext */ +typedef struct QEMUTimer QEMUTimer; +#define QEMU_CLOCK_MAXCNT 3 + +typedef struct TimerList { + QEMUTimer *active_timers; + QemuMutex active_timers_lock; +} TimerList; + +void timer_list_init(TimerList *tlist); +void timer_list_finalize(TimerList *tlist); + typedef struct AioContext { GSource source; @@ -73,6 +85,7 @@ typedef struct AioContext { /* Thread pool for performing work and receiving completion callbacks */ struct ThreadPool *thread_pool; + TimerList timer_list[QEMU_CLOCK_MAXCNT]; } AioContext; /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9dd206c..3e5016b 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -33,9 +33,14 @@ extern QEMUClock *vm_clock; extern QEMUClock *host_clock; int64_t qemu_get_clock_ns(QEMUClock *clock); +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. + */ int64_t qemu_clock_has_timers(QEMUClock *clock); int64_t qemu_clock_expired(QEMUClock *clock); int64_t qemu_clock_deadline(QEMUClock *clock); + void qemu_clock_enable(QEMUClock *clock, bool enabled); void qemu_clock_warp(QEMUClock *clock); @@ -45,6 +50,9 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, QEMUTimerCB *cb, void *opaque); +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx); + void qemu_free_timer(QEMUTimer *ts); void qemu_del_timer(QEMUTimer *ts); void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time); @@ -75,6 +83,18 @@ static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, return qemu_new_timer(clock, SCALE_MS, cb, opaque); } +static inline QEMUTimer *aioctx_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_NS, cb, opaque, ctx); +} + +static inline QEMUTimer *aioctx_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb, + void *opaque, AioContext *ctx) +{ + return aioctx_new_timer(clock, SCALE_MS, cb, opaque, ctx); +} + static inline int64_t qemu_get_clock_ms(QEMUClock *clock) { return qemu_get_clock_ns(clock) / SCALE_MS; diff --git a/qemu-timer.c b/qemu-timer.c index d941a83..f15c3e6 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -45,14 +45,6 @@ #define QEMU_CLOCK_REALTIME 0 #define QEMU_CLOCK_VIRTUAL 1 #define QEMU_CLOCK_HOST 2 -#define QEMU_CLOCK_MAXCNT 3 - -typedef struct TimerList { - QEMUTimer *active_timers; - QemuMutex active_timers_lock; -} TimerList; - -static TimerList timer_list[QEMU_CLOCK_MAXCNT]; struct QEMUClock { NotifierList reset_notifiers; @@ -72,7 +64,9 @@ struct QEMUClock { struct QEMUTimer { int64_t expire_time; /* in nanoseconds */ + /* quick link to AioContext timer list */ TimerList *list; + AioContext *ctx; QEMUTimerCB *cb; void *opaque; QEMUTimer *next; @@ -100,11 +94,12 @@ struct qemu_alarm_timer { static struct qemu_alarm_timer *alarm_timer; -static TimerList *clock_to_timerlist(QEMUClock *clock) +static TimerList *clock_to_timerlist(QEMUClock *clock, AioContext *ctx) { int type = clock->type; - return &timer_list[type]; + assert(ctx); + return &ctx->timer_list[type]; } static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) @@ -112,7 +107,8 @@ static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) return timer_head && (timer_head->expire_time <= current_time); } -static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) +static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta, + AioContext *ctx) { int64_t expire_time, next; bool has_timer = false; @@ -122,7 +118,7 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) return delta; } - tlist = clock_to_timerlist(clock); + tlist = clock_to_timerlist(clock, ctx); qemu_mutex_lock(&tlist->active_timers_lock); if (tlist->active_timers) { has_timer = true; @@ -140,12 +136,13 @@ static int64_t qemu_next_clock_deadline(QEMUClock *clock, int64_t delta) static int64_t qemu_next_alarm_deadline(void) { int64_t delta = INT64_MAX; + AioContext *ctx = *tls_get_thread_aio_context(); if (!use_icount) { - delta = qemu_next_clock_deadline(vm_clock, delta); + delta = qemu_next_clock_deadline(vm_clock, delta, ctx); } - delta = qemu_next_clock_deadline(host_clock, delta); - return qemu_next_clock_deadline(rt_clock, delta); + delta = qemu_next_clock_deadline(host_clock, delta, ctx); + return qemu_next_clock_deadline(rt_clock, delta, ctx); } static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) @@ -267,16 +264,21 @@ QEMUClock *rt_clock; QEMUClock *vm_clock; QEMUClock *host_clock; -static void timer_list_init(TimerList *tlist) +void timer_list_init(TimerList *tlist) { qemu_mutex_init(&tlist->active_timers_lock); tlist->active_timers = NULL; } +void timer_list_finalize(TimerList *tlist) +{ + qemu_mutex_destroy(&tlist->active_timers_lock); + assert(!tlist->active_timers); +} + static QEMUClock *qemu_new_clock(int type) { QEMUClock *clock; - TimerList *tlist; clock = g_malloc0(sizeof(QEMUClock)); clock->type = type; @@ -286,8 +288,6 @@ static QEMUClock *qemu_new_clock(int type) qemu_cond_init(&clock->wait_using); qemu_mutex_init(&clock->lock); notifier_list_init(&clock->reset_notifiers); - tlist = clock_to_timerlist(clock); - timer_list_init(tlist); return clock; } @@ -308,10 +308,14 @@ void qemu_clock_enable(QEMUClock *clock, bool enabled) } } +/* qemu_clock_has_timers, qemu_clock_expired, qemu_clock_deadline + * run In tcg icount mode. There is only one AioContext i.e. qemu_aio_context. + * So we only count the timers on qemu_aio_context. +*/ int64_t qemu_clock_has_timers(QEMUClock *clock) { bool has_timers; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = !!tlist->active_timers; @@ -323,7 +327,7 @@ int64_t qemu_clock_expired(QEMUClock *clock) { bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -339,7 +343,7 @@ int64_t qemu_clock_deadline(QEMUClock *clock) int64_t delta = INT32_MAX; bool has_timers; int64_t expire_time; - TimerList *tlist = clock_to_timerlist(clock); + TimerList *tlist = clock_to_timerlist(clock, qemu_get_aio_context()); qemu_mutex_lock(&tlist->active_timers_lock); has_timers = tlist->active_timers; @@ -355,19 +359,26 @@ int64_t qemu_clock_deadline(QEMUClock *clock) return delta; } -QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, - QEMUTimerCB *cb, void *opaque) +QEMUTimer *aioctx_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque, AioContext *ctx) { QEMUTimer *ts; ts = g_malloc0(sizeof(QEMUTimer)); - ts->list = clock_to_timerlist(clock); + ts->list = clock_to_timerlist(clock, ctx); ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->ctx = ctx; return ts; } +QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return aioctx_new_timer(clock, scale, cb, opaque, qemu_get_aio_context()); +} + void qemu_free_timer(QEMUTimer *ts) { g_free(ts); @@ -457,6 +468,7 @@ void qemu_run_timers(QEMUClock *clock) QEMUTimer *ts; int64_t current_time; TimerList *tlist; + AioContext *ctx; atomic_inc(&clock->using); if (unlikely(!clock->enabled)) { @@ -465,7 +477,8 @@ void qemu_run_timers(QEMUClock *clock) current_time = qemu_get_clock_ns(clock); - tlist = clock_to_timerlist(clock); + ctx = *tls_get_thread_aio_context(); + tlist = clock_to_timerlist(clock, ctx); for(;;) { qemu_mutex_lock(&tlist->active_timers_lock); -- 1.8.1.4