On Fri, 04/15 13:32, Paolo Bonzini wrote: > Add two implementations of the same benchmark as the previous patch, > but using pthreads. One uses a normal QemuMutex, the other is Linux > only and implements a fair mutex based on MCS locks and futexes. > This shows that the slower performance of the 5-thread case is due to > the fairness of CoMutex, rather than to coroutines. If fairness does > not matter, as is the case with two threads, CoMutex can actually be > faster than pthreads. > > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > tests/test-aio-multithread.c | 152 > +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 152 insertions(+) > > diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c > index 18b3548..d7bc1bf 100644 > --- a/tests/test-aio-multithread.c > +++ b/tests/test-aio-multithread.c > @@ -279,6 +279,150 @@ static void test_multi_co_mutex_2_30(void) > test_multi_co_mutex(2, 30); > } > > +/* Same test with fair mutexes, for performance comparison. */ > + > +#ifdef CONFIG_LINUX > +#include "qemu/futex.h"
Do we have qemu/futex.h? > + > +/* The nodes for the mutex reside in this structure (on which we try to avoid > + * false sharing). The head of the mutex is in the "mutex_head" variable. > + */ > +static struct { > + int next, locked; > + int padding[14]; > +} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64))); > + > +static int mutex_head = -1; > + > +static void mcs_mutex_lock(void) > +{ > + int prev; > + > + nodes[id].next = -1; > + nodes[id].locked = 1; > + prev = atomic_xchg(&mutex_head, id); > + if (prev != -1) { > + atomic_set(&nodes[prev].next, id); > + futex_wait(&nodes[id].locked, 1); > + } > +} > + > +static void mcs_mutex_unlock(void) > +{ > + int next; > + if (nodes[id].next == -1) { > + if (atomic_read(&mutex_head) == id && > + atomic_cmpxchg(&mutex_head, id, -1) == id) { > + /* Last item in the list, exit. */ > + return; > + } > + while (atomic_read(&nodes[id].next) == -1) { > + /* Spin... */ > + } > + } > + > + /* Wake up the next in line. */ > + next = nodes[id].next; > + nodes[next].locked = 0; > + futex_wake(&nodes[next].locked, 1); > +} > + > +static void test_multi_fair_mutex_entry(void *opaque) > +{ > + while (!atomic_mb_read(&now_stopping)) { > + mcs_mutex_lock(); > + counter++; > + mcs_mutex_unlock(); > + atomic_inc(&atomic_counter); > + } > + > +} > + > +static void test_multi_fair_mutex(int threads, int seconds) > +{ > + int i; > + > + assert(mutex_head == -1); > + counter = 0; > + atomic_counter = 0; > + now_stopping = false; > + > + create_aio_contexts(); > + assert(threads <= NUM_CONTEXTS); > + for (i = 0; i < threads; i++) { > + Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry); > + aio_co_schedule(ctx[i], co1); > + } > + > + g_usleep(seconds * 1000000); > + > + atomic_mb_set(&now_stopping, true); > + join_aio_contexts(); > + g_test_message("%d iterations/second\n", counter / seconds); > + g_assert_cmpint(counter, ==, atomic_counter); > +} > + > +static void test_multi_fair_mutex_1(void) > +{ > + test_multi_fair_mutex(NUM_CONTEXTS, 1); > +} > + > +static void test_multi_fair_mutex_10(void) > +{ > + test_multi_fair_mutex(NUM_CONTEXTS, 10); > +} > +#endif > + > +/* Same test with pthread mutexes, for performance comparison and > + * portability. */ > + > +static QemuMutex mutex; > + > +static void test_multi_mutex_entry(void *opaque) > +{ > + while (!atomic_mb_read(&now_stopping)) { > + qemu_mutex_lock(&mutex); > + counter++; > + qemu_mutex_unlock(&mutex); > + atomic_inc(&atomic_counter); > + } > + > +} > + > +static void test_multi_mutex(int threads, int seconds) > +{ > + int i; > + > + qemu_mutex_init(&mutex); > + counter = 0; > + atomic_counter = 0; > + now_stopping = false; > + > + create_aio_contexts(); > + assert(threads <= NUM_CONTEXTS); > + for (i = 0; i < threads; i++) { > + Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry); > + aio_co_schedule(ctx[i], co1); > + } > + > + g_usleep(seconds * 1000000); > + > + atomic_mb_set(&now_stopping, true); > + join_aio_contexts(); > + g_test_message("%d iterations/second\n", counter / seconds); > + g_assert_cmpint(counter, ==, atomic_counter); > +} > + > +static void test_multi_mutex_1(void) > +{ > + test_multi_mutex(NUM_CONTEXTS, 1); > +} > + > +static void test_multi_mutex_10(void) > +{ > + test_multi_mutex(NUM_CONTEXTS, 10); > +} > + > /* End of tests. */ > > int main(int argc, char **argv) > @@ -291,10 +435,18 @@ int main(int argc, char **argv) > g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1); > g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_1); > g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3); > +#ifdef CONFIG_LINUX > + g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1); > +#endif > + g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1); > } else { > g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10); > g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_10); > g_test_add_func("/aio/multi/mutex/handoff", > test_multi_co_mutex_2_30); > +#ifdef CONFIG_LINUX > + g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10); > +#endif > + g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10); > } > return g_test_run(); > } > -- > 2.5.5 > >