diff mbox

[08/11] test-aio-multithread: add performance comparison with thread-based mutexes

Message ID 1460719926-12950-9-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini April 15, 2016, 11:32 a.m. UTC
Add two implementations of the same benchmark as the previous patch,
but using pthreads.  One uses a normal QemuMutex, the other is Linux
only and implements a fair mutex based on MCS locks and futexes.
This shows that the slower performance of the 5-thread case is due to
the fairness of CoMutex, rather than to coroutines.  If fairness does
not matter, as is the case with two threads, CoMutex can actually be
faster than pthreads.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tests/test-aio-multithread.c | 152 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

Comments

Fam Zheng April 29, 2016, 6:52 a.m. UTC | #1
On Fri, 04/15 13:32, Paolo Bonzini wrote:
> Add two implementations of the same benchmark as the previous patch,
> but using pthreads.  One uses a normal QemuMutex, the other is Linux
> only and implements a fair mutex based on MCS locks and futexes.
> This shows that the slower performance of the 5-thread case is due to
> the fairness of CoMutex, rather than to coroutines.  If fairness does
> not matter, as is the case with two threads, CoMutex can actually be
> faster than pthreads.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  tests/test-aio-multithread.c | 152 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 152 insertions(+)
> 
> diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
> index 18b3548..d7bc1bf 100644
> --- a/tests/test-aio-multithread.c
> +++ b/tests/test-aio-multithread.c
> @@ -279,6 +279,150 @@ static void test_multi_co_mutex_2_30(void)
>      test_multi_co_mutex(2, 30);
>  }
>  
> +/* Same test with fair mutexes, for performance comparison.  */
> +
> +#ifdef CONFIG_LINUX
> +#include "qemu/futex.h"

Do we have qemu/futex.h?

> +
> +/* The nodes for the mutex reside in this structure (on which we try to avoid
> + * false sharing).  The head of the mutex is in the "mutex_head" variable.
> + */
> +static struct {
> +    int next, locked;
> +    int padding[14];
> +} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
> +
> +static int mutex_head = -1;
> +
> +static void mcs_mutex_lock(void)
> +{
> +    int prev;
> +
> +    nodes[id].next = -1;
> +    nodes[id].locked = 1;
> +    prev = atomic_xchg(&mutex_head, id);
> +    if (prev != -1) {
> +        atomic_set(&nodes[prev].next, id);
> +        futex_wait(&nodes[id].locked, 1);
> +    }
> +}
> +
> +static void mcs_mutex_unlock(void)
> +{
> +    int next;
> +    if (nodes[id].next == -1) {
> +        if (atomic_read(&mutex_head) == id &&
> +            atomic_cmpxchg(&mutex_head, id, -1) == id) {
> +            /* Last item in the list, exit.  */
> +            return;
> +        }
> +        while (atomic_read(&nodes[id].next) == -1) {
> +            /* Spin... */
> +        }
> +    }
> +
> +    /* Wake up the next in line.  */
> +    next = nodes[id].next;
> +    nodes[next].locked = 0;
> +    futex_wake(&nodes[next].locked, 1);
> +}
> +
> +static void test_multi_fair_mutex_entry(void *opaque)
> +{
> +    while (!atomic_mb_read(&now_stopping)) {
> +        mcs_mutex_lock();
> +        counter++;
> +        mcs_mutex_unlock();
> +        atomic_inc(&atomic_counter);
> +    }
> +
> +}
> +
> +static void test_multi_fair_mutex(int threads, int seconds)
> +{
> +    int i;
> +
> +    assert(mutex_head == -1);
> +    counter = 0;
> +    atomic_counter = 0;
> +    now_stopping = false;
> +
> +    create_aio_contexts();
> +    assert(threads <= NUM_CONTEXTS);
> +    for (i = 0; i < threads; i++) {
> +        Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry);
> +        aio_co_schedule(ctx[i], co1);
> +    }
> +
> +    g_usleep(seconds * 1000000);
> +
> +    atomic_mb_set(&now_stopping, true);
> +    join_aio_contexts();
> +    g_test_message("%d iterations/second\n", counter / seconds);
> +    g_assert_cmpint(counter, ==, atomic_counter);
> +}
> +
> +static void test_multi_fair_mutex_1(void)
> +{
> +    test_multi_fair_mutex(NUM_CONTEXTS, 1);
> +}
> +
> +static void test_multi_fair_mutex_10(void)
> +{
> +    test_multi_fair_mutex(NUM_CONTEXTS, 10);
> +}
> +#endif
> +
> +/* Same test with pthread mutexes, for performance comparison and
> + * portability.  */
> +
> +static QemuMutex mutex;
> +
> +static void test_multi_mutex_entry(void *opaque)
> +{
> +    while (!atomic_mb_read(&now_stopping)) {
> +        qemu_mutex_lock(&mutex);
> +        counter++;
> +        qemu_mutex_unlock(&mutex);
> +        atomic_inc(&atomic_counter);
> +    }
> +
> +}
> +
> +static void test_multi_mutex(int threads, int seconds)
> +{
> +    int i;
> +
> +    qemu_mutex_init(&mutex);
> +    counter = 0;
> +    atomic_counter = 0;
> +    now_stopping = false;
> +
> +    create_aio_contexts();
> +    assert(threads <= NUM_CONTEXTS);
> +    for (i = 0; i < threads; i++) {
> +        Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry);
> +        aio_co_schedule(ctx[i], co1);
> +    }
> +
> +    g_usleep(seconds * 1000000);
> +
> +    atomic_mb_set(&now_stopping, true);
> +    join_aio_contexts();
> +    g_test_message("%d iterations/second\n", counter / seconds);
> +    g_assert_cmpint(counter, ==, atomic_counter);
> +}
> +
> +static void test_multi_mutex_1(void)
> +{
> +    test_multi_mutex(NUM_CONTEXTS, 1);
> +}
> +
> +static void test_multi_mutex_10(void)
> +{
> +    test_multi_mutex(NUM_CONTEXTS, 10);
> +}
> +
>  /* End of tests.  */
>  
>  int main(int argc, char **argv)
> @@ -291,10 +435,18 @@ int main(int argc, char **argv)
>          g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
>          g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_1);
>          g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
> +#ifdef CONFIG_LINUX
> +        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
> +#endif
> +        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
>      } else {
>          g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
>          g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_10);
>          g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
> +#ifdef CONFIG_LINUX
> +        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
> +#endif
> +        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
>      }
>      return g_test_run();
>  }
> -- 
> 2.5.5
> 
>
Paolo Bonzini May 12, 2016, 4:49 p.m. UTC | #2
> >  tests/test-aio-multithread.c | 152
> >  +++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 152 insertions(+)
> > 
> > diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
> > index 18b3548..d7bc1bf 100644
> > --- a/tests/test-aio-multithread.c
> > +++ b/tests/test-aio-multithread.c
> > @@ -279,6 +279,150 @@ static void test_multi_co_mutex_2_30(void)
> >      test_multi_co_mutex(2, 30);
> >  }
> >  
> > +/* Same test with fair mutexes, for performance comparison.  */
> > +
> > +#ifdef CONFIG_LINUX
> > +#include "qemu/futex.h"
> 
> Do we have qemu/futex.h?

It must be somewhere in the previous 50 patches...  QemuLockCnt adds it.

Paolo


> > +
> > +/* The nodes for the mutex reside in this structure (on which we try to
> > avoid
> > + * false sharing).  The head of the mutex is in the "mutex_head" variable.
> > + */
> > +static struct {
> > +    int next, locked;
> > +    int padding[14];
> > +} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
> > +
> > +static int mutex_head = -1;
> > +
> > +static void mcs_mutex_lock(void)
> > +{
> > +    int prev;
> > +
> > +    nodes[id].next = -1;
> > +    nodes[id].locked = 1;
> > +    prev = atomic_xchg(&mutex_head, id);
> > +    if (prev != -1) {
> > +        atomic_set(&nodes[prev].next, id);
> > +        futex_wait(&nodes[id].locked, 1);
> > +    }
> > +}
> > +
> > +static void mcs_mutex_unlock(void)
> > +{
> > +    int next;
> > +    if (nodes[id].next == -1) {
> > +        if (atomic_read(&mutex_head) == id &&
> > +            atomic_cmpxchg(&mutex_head, id, -1) == id) {
> > +            /* Last item in the list, exit.  */
> > +            return;
> > +        }
> > +        while (atomic_read(&nodes[id].next) == -1) {
> > +            /* Spin... */
> > +        }
> > +    }
> > +
> > +    /* Wake up the next in line.  */
> > +    next = nodes[id].next;
> > +    nodes[next].locked = 0;
> > +    futex_wake(&nodes[next].locked, 1);
> > +}
> > +
> > +static void test_multi_fair_mutex_entry(void *opaque)
> > +{
> > +    while (!atomic_mb_read(&now_stopping)) {
> > +        mcs_mutex_lock();
> > +        counter++;
> > +        mcs_mutex_unlock();
> > +        atomic_inc(&atomic_counter);
> > +    }
> > +
> > +}
> > +
> > +static void test_multi_fair_mutex(int threads, int seconds)
> > +{
> > +    int i;
> > +
> > +    assert(mutex_head == -1);
> > +    counter = 0;
> > +    atomic_counter = 0;
> > +    now_stopping = false;
> > +
> > +    create_aio_contexts();
> > +    assert(threads <= NUM_CONTEXTS);
> > +    for (i = 0; i < threads; i++) {
> > +        Coroutine *co1 =
> > qemu_coroutine_create(test_multi_fair_mutex_entry);
> > +        aio_co_schedule(ctx[i], co1);
> > +    }
> > +
> > +    g_usleep(seconds * 1000000);
> > +
> > +    atomic_mb_set(&now_stopping, true);
> > +    join_aio_contexts();
> > +    g_test_message("%d iterations/second\n", counter / seconds);
> > +    g_assert_cmpint(counter, ==, atomic_counter);
> > +}
> > +
> > +static void test_multi_fair_mutex_1(void)
> > +{
> > +    test_multi_fair_mutex(NUM_CONTEXTS, 1);
> > +}
> > +
> > +static void test_multi_fair_mutex_10(void)
> > +{
> > +    test_multi_fair_mutex(NUM_CONTEXTS, 10);
> > +}
> > +#endif
> > +
> > +/* Same test with pthread mutexes, for performance comparison and
> > + * portability.  */
> > +
> > +static QemuMutex mutex;
> > +
> > +static void test_multi_mutex_entry(void *opaque)
> > +{
> > +    while (!atomic_mb_read(&now_stopping)) {
> > +        qemu_mutex_lock(&mutex);
> > +        counter++;
> > +        qemu_mutex_unlock(&mutex);
> > +        atomic_inc(&atomic_counter);
> > +    }
> > +
> > +}
> > +
> > +static void test_multi_mutex(int threads, int seconds)
> > +{
> > +    int i;
> > +
> > +    qemu_mutex_init(&mutex);
> > +    counter = 0;
> > +    atomic_counter = 0;
> > +    now_stopping = false;
> > +
> > +    create_aio_contexts();
> > +    assert(threads <= NUM_CONTEXTS);
> > +    for (i = 0; i < threads; i++) {
> > +        Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry);
> > +        aio_co_schedule(ctx[i], co1);
> > +    }
> > +
> > +    g_usleep(seconds * 1000000);
> > +
> > +    atomic_mb_set(&now_stopping, true);
> > +    join_aio_contexts();
> > +    g_test_message("%d iterations/second\n", counter / seconds);
> > +    g_assert_cmpint(counter, ==, atomic_counter);
> > +}
> > +
> > +static void test_multi_mutex_1(void)
> > +{
> > +    test_multi_mutex(NUM_CONTEXTS, 1);
> > +}
> > +
> > +static void test_multi_mutex_10(void)
> > +{
> > +    test_multi_mutex(NUM_CONTEXTS, 10);
> > +}
> > +
> >  /* End of tests.  */
> >  
> >  int main(int argc, char **argv)
> > @@ -291,10 +435,18 @@ int main(int argc, char **argv)
> >          g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
> >          g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_1);
> >          g_test_add_func("/aio/multi/mutex/handoff",
> >          test_multi_co_mutex_2_3);
> > +#ifdef CONFIG_LINUX
> > +        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
> > +#endif
> > +        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
> >      } else {
> >          g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
> >          g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_10);
> >          g_test_add_func("/aio/multi/mutex/handoff",
> >          test_multi_co_mutex_2_30);
> > +#ifdef CONFIG_LINUX
> > +        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
> > +#endif
> > +        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
> >      }
> >      return g_test_run();
> >  }
> > --
> > 2.5.5
> > 
> > 
>
diff mbox

Patch

diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
index 18b3548..d7bc1bf 100644
--- a/tests/test-aio-multithread.c
+++ b/tests/test-aio-multithread.c
@@ -279,6 +279,150 @@  static void test_multi_co_mutex_2_30(void)
     test_multi_co_mutex(2, 30);
 }
 
+/* Same test with fair mutexes, for performance comparison.  */
+
+#ifdef CONFIG_LINUX
+#include "qemu/futex.h"
+
+/* The nodes for the mutex reside in this structure (on which we try to avoid
+ * false sharing).  The head of the mutex is in the "mutex_head" variable.
+ */
+static struct {
+    int next, locked;
+    int padding[14];
+} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
+
+static int mutex_head = -1;
+
+static void mcs_mutex_lock(void)
+{
+    int prev;
+
+    nodes[id].next = -1;
+    nodes[id].locked = 1;
+    prev = atomic_xchg(&mutex_head, id);
+    if (prev != -1) {
+        atomic_set(&nodes[prev].next, id);
+        futex_wait(&nodes[id].locked, 1);
+    }
+}
+
+static void mcs_mutex_unlock(void)
+{
+    int next;
+    if (nodes[id].next == -1) {
+        if (atomic_read(&mutex_head) == id &&
+            atomic_cmpxchg(&mutex_head, id, -1) == id) {
+            /* Last item in the list, exit.  */
+            return;
+        }
+        while (atomic_read(&nodes[id].next) == -1) {
+            /* Spin... */
+        }
+    }
+
+    /* Wake up the next in line.  */
+    next = nodes[id].next;
+    nodes[next].locked = 0;
+    futex_wake(&nodes[next].locked, 1);
+}
+
+static void test_multi_fair_mutex_entry(void *opaque)
+{
+    while (!atomic_mb_read(&now_stopping)) {
+        mcs_mutex_lock();
+        counter++;
+        mcs_mutex_unlock();
+        atomic_inc(&atomic_counter);
+    }
+
+}
+
+static void test_multi_fair_mutex(int threads, int seconds)
+{
+    int i;
+
+    assert(mutex_head == -1);
+    counter = 0;
+    atomic_counter = 0;
+    now_stopping = false;
+
+    create_aio_contexts();
+    assert(threads <= NUM_CONTEXTS);
+    for (i = 0; i < threads; i++) {
+        Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry);
+        aio_co_schedule(ctx[i], co1);
+    }
+
+    g_usleep(seconds * 1000000);
+
+    atomic_mb_set(&now_stopping, true);
+    join_aio_contexts();
+    g_test_message("%d iterations/second\n", counter / seconds);
+    g_assert_cmpint(counter, ==, atomic_counter);
+}
+
+static void test_multi_fair_mutex_1(void)
+{
+    test_multi_fair_mutex(NUM_CONTEXTS, 1);
+}
+
+static void test_multi_fair_mutex_10(void)
+{
+    test_multi_fair_mutex(NUM_CONTEXTS, 10);
+}
+#endif
+
+/* Same test with pthread mutexes, for performance comparison and
+ * portability.  */
+
+static QemuMutex mutex;
+
+static void test_multi_mutex_entry(void *opaque)
+{
+    while (!atomic_mb_read(&now_stopping)) {
+        qemu_mutex_lock(&mutex);
+        counter++;
+        qemu_mutex_unlock(&mutex);
+        atomic_inc(&atomic_counter);
+    }
+
+}
+
+static void test_multi_mutex(int threads, int seconds)
+{
+    int i;
+
+    qemu_mutex_init(&mutex);
+    counter = 0;
+    atomic_counter = 0;
+    now_stopping = false;
+
+    create_aio_contexts();
+    assert(threads <= NUM_CONTEXTS);
+    for (i = 0; i < threads; i++) {
+        Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry);
+        aio_co_schedule(ctx[i], co1);
+    }
+
+    g_usleep(seconds * 1000000);
+
+    atomic_mb_set(&now_stopping, true);
+    join_aio_contexts();
+    g_test_message("%d iterations/second\n", counter / seconds);
+    g_assert_cmpint(counter, ==, atomic_counter);
+}
+
+static void test_multi_mutex_1(void)
+{
+    test_multi_mutex(NUM_CONTEXTS, 1);
+}
+
+static void test_multi_mutex_10(void)
+{
+    test_multi_mutex(NUM_CONTEXTS, 10);
+}
+
 /* End of tests.  */
 
 int main(int argc, char **argv)
@@ -291,10 +435,18 @@  int main(int argc, char **argv)
         g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
         g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_1);
         g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
+#ifdef CONFIG_LINUX
+        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
+#endif
+        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
     } else {
         g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
         g_test_add_func("/aio/multi/mutex", test_multi_co_mutex_10);
         g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
+#ifdef CONFIG_LINUX
+        g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
+#endif
+        g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
     }
     return g_test_run();
 }