mcslock: move performance test to perf tests

Phil Yang Wed, 22 Jul 2020 19:37:13 -0700

Hi Aaron,

It seems Travis CI cannot capture this timeout issue.
But the local test records these two cases as TIMEOUT under the default timeout 
configuration.


1. The test results on x86 server (72 lcores online@ 2.60GHz) are:
a. Default setting:
$ sudo meson test -C build_test --suite DPDK:fast-tests
   3/92 DPDK:fast-tests / atomic_autotest                TIMEOUT        10.32s
39/92 DPDK:fast-tests / mcslock_autotest               TIMEOUT        10.32s

b. Extend timeout:
$ sudo meson test -C build_test --suite DPDK:fast-tests -t 30
   3/92 DPDK:fast-tests / atomic_autotest                OK             173.01s
39/92 DPDK:fast-tests / mcslock_autotest               OK             12.19s

2. The test results on aarch64 server (56 lcore online @ 2.0GHz) are:
a. Default setting:
$ sudo meson test -C build_test --suite DPDK:fast-tests
   3/92 DPDK:fast-tests / atomic_autotest       TIMEOUT 10.47 s
39/92 DPDK:fast-tests / mcslock_autotest      TIMEOUT 10.47 s

b. Extend timeout:
$ sudo meson test -C build_test --suite DPDK:fast-tests -t 60
   3/92 DPDK:fast-tests / atomic_autotest       OK      431.89 s
39/92 DPDK:fast-tests / mcslock_autotest      OK      21.25 s

So I think these two patches can resolve this issue. I'd love to hear your 
thoughts.

Thanks,
Phil

<snip>

> Subject: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to
> perf tests
> 
> The MCS lock performance test takes more than 10 seconds and leads
> to meson test timeout on some platforms. Move the performance test
> into perf tests.
> 
> Signed-off-by: Phil Yang <phil.y...@arm.com>
> Reviewed-by: Gavin Hu <gavin...@arm.com>
> ---
>  MAINTAINERS                  |   1 +
>  app/test/Makefile            |   1 +
>  app/test/autotest_data.py    |   6 +++
>  app/test/meson.build         |   2 +
>  app/test/test_mcslock.c      |  88 -------------------------------
>  app/test/test_mcslock_perf.c | 121
> +++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 131 insertions(+), 88 deletions(-)
>  create mode 100644 app/test/test_mcslock_perf.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index db235c2..411bdeb 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.y...@arm.com>
>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
>  F: app/test/test_mcslock.c
> +F: app/test/test_mcslock_perf.c
> 
>  Ticketlock
>  M: Joyce Kong <joyce.k...@arm.com>
> diff --git a/app/test/Makefile b/app/test/Makefile
> index 1f080d1..97de3ac 100644
> --- a/app/test/Makefile
> +++ b/app/test/Makefile
> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
>  SRCS-y += test_malloc.c
>  SRCS-y += test_cycles.c
>  SRCS-y += test_mcslock.c
> +SRCS-y += test_mcslock_perf.c
>  SRCS-y += test_spinlock.c
>  SRCS-y += test_ticketlock.c
>  SRCS-y += test_memory.c
> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
> index 7b1d013..2a4619d 100644
> --- a/app/test/autotest_data.py
> +++ b/app/test/autotest_data.py
> @@ -784,6 +784,12 @@
>          "Func":    default_autotest,
>          "Report":  None,
>      },
> +    {
> +        "Name":    "MCS Lock performance autotest",
> +        "Command": "mcslock_perf_autotest",
> +        "Func":    default_autotest,
> +        "Report":  None,
> +    },
>      #
>      # Please always make sure that ring_perf is the last test!
>      #
> diff --git a/app/test/meson.build b/app/test/meson.build
> index 0a2ce71..335a869 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -82,6 +82,7 @@ test_sources = files('commands.c',
>       'test_meter.c',
>       'test_metrics.c',
>       'test_mcslock.c',
> +     'test_mcslock_perf.c',
>       'test_mp_secondary.c',
>       'test_per_lcore.c',
>       'test_pmd_perf.c',
> @@ -270,6 +271,7 @@ perf_test_names = [
>          'rand_perf_autotest',
>          'hash_readwrite_perf_autotest',
>          'hash_readwrite_lf_perf_autotest',
> +     'mcslock_perf_autotest',
>  ]
> 
>  driver_test_names = [
> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
> index e9359df..15f9751 100644
> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -32,23 +32,16 @@
>   *
>   *   - The function takes the global lock, display something, then releases
>   *     the global lock on each core.
> - *
> - * - A load test is carried out, with all cores attempting to lock a single
> - *   lock multiple times.
>   */
> 
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
> -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> 
>  rte_mcslock_t *p_ml;
>  rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
> 
>  static unsigned int count;
> 
> -static rte_atomic32_t synchro;
> -
>  static int
>  test_mcslock_per_core(__attribute__((unused)) void *arg)
>  {
> @@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void
> *arg)
>       return 0;
>  }
> 
> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
> -
>  #define MAX_LOOP 1000000
> 
> -static int
> -load_loop_fn(void *func_param)
> -{
> -     uint64_t time_diff = 0, begin;
> -     uint64_t hz = rte_get_timer_hz();
> -     volatile uint64_t lcount = 0;
> -     const int use_lock = *(int *)func_param;
> -     const unsigned int lcore = rte_lcore_id();
> -
> -     /**< Per core me node. */
> -     rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> -
> -     /* wait synchro */
> -     while (rte_atomic32_read(&synchro) == 0)
> -             ;
> -
> -     begin = rte_get_timer_cycles();
> -     while (lcount < MAX_LOOP) {
> -             if (use_lock)
> -                     rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> -
> -             lcount++;
> -             if (use_lock)
> -                     rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> -     }
> -     time_diff = rte_get_timer_cycles() - begin;
> -     time_count[lcore] = time_diff * 1000000 / hz;
> -     return 0;
> -}
> -
> -static int
> -test_mcslock_perf(void)
> -{
> -     unsigned int i;
> -     uint64_t total = 0;
> -     int lock = 0;
> -     const unsigned int lcore = rte_lcore_id();
> -
> -     printf("\nTest with no lock on single core...\n");
> -     rte_atomic32_set(&synchro, 1);
> -     load_loop_fn(&lock);
> -     printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -                     lcore, time_count[lcore]);
> -     memset(time_count, 0, sizeof(time_count));
> -
> -     printf("\nTest with lock on single core...\n");
> -     lock = 1;
> -     rte_atomic32_set(&synchro, 1);
> -     load_loop_fn(&lock);
> -     printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -                     lcore, time_count[lcore]);
> -     memset(time_count, 0, sizeof(time_count));
> -
> -     printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> -
> -     rte_atomic32_set(&synchro, 0);
> -     rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> -
> -     /* start synchro and launch test on master */
> -     rte_atomic32_set(&synchro, 1);
> -     load_loop_fn(&lock);
> -
> -     rte_eal_mp_wait_lcore();
> -
> -     RTE_LCORE_FOREACH(i) {
> -             printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -                             i, time_count[i]);
> -             total += time_count[i];
> -     }
> -
> -     printf("Total Cost Time = %"PRIu64" us\n", total);
> -
> -     return 0;
> -}
> -
>  /*
>   * Use rte_mcslock_trylock() to trylock a mcs lock object,
>   * If it could not lock the object successfully, it would
> @@ -240,10 +156,6 @@ test_mcslock(void)
>               ret = -1;
>       rte_mcslock_unlock(&p_ml, &ml_me);
> 
> -     /* mcs lock perf test */
> -     if (test_mcslock_perf() < 0)
> -             return -1;
> -
>       return ret;
>  }
> 
> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
> new file mode 100644
> index 0000000..6948344
> --- /dev/null
> +++ b/app/test/test_mcslock_perf.c
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +#include <rte_mcslock.h>
> +#include <rte_atomic.h>
> +
> +#include "test.h"
> +
> +/*
> + * RTE MCS lock perf test
> + * ======================
> + *
> + * These tests are derived from spin lock perf test cases.
> + *
> + * - A load test is carried out, with all cores attempting to lock a single
> + *   lock multiple times.
> + */
> +
> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> +rte_mcslock_t *p_ml_perf;
> +
> +static rte_atomic32_t synchro;
> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
> +
> +#define MAX_LOOP 1000000
> +
> +static int
> +load_loop_fn(void *func_param)
> +{
> +     uint64_t time_diff = 0, begin;
> +     uint64_t hz = rte_get_timer_hz();
> +     volatile uint64_t lcount = 0;
> +     const int use_lock = *(int *)func_param;
> +     const unsigned int lcore = rte_lcore_id();
> +
> +     /**< Per core me node. */
> +     rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> +
> +     /* wait synchro */
> +     while (rte_atomic32_read(&synchro) == 0)
> +             ;
> +
> +     begin = rte_get_timer_cycles();
> +     while (lcount < MAX_LOOP) {
> +             if (use_lock)
> +                     rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> +
> +             lcount++;
> +             if (use_lock)
> +                     rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> +     }
> +     time_diff = rte_get_timer_cycles() - begin;
> +     time_count[lcore] = time_diff * 1000000 / hz;
> +     return 0;
> +}
> +
> +/*
> + * Test rte_eal_get_lcore_state() in addition to mcs locks
> + * as we have "waiting" then "running" lcores.
> + */
> +static int
> +test_mcslock_perf(void)
> +{
> +     unsigned int i;
> +     uint64_t total = 0;
> +     int lock = 0;
> +     const unsigned int lcore = rte_lcore_id();
> +
> +     printf("\nTest with no lock on single core...\n");
> +     rte_atomic32_set(&synchro, 1);
> +     load_loop_fn(&lock);
> +     printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +                     lcore, time_count[lcore]);
> +     memset(time_count, 0, sizeof(time_count));
> +
> +     printf("\nTest with lock on single core...\n");
> +     lock = 1;
> +     rte_atomic32_set(&synchro, 1);
> +     load_loop_fn(&lock);
> +     printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +                     lcore, time_count[lcore]);
> +     memset(time_count, 0, sizeof(time_count));
> +
> +     printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> +
> +     rte_atomic32_set(&synchro, 0);
> +     rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> +
> +     /* start synchro and launch test on master */
> +     rte_atomic32_set(&synchro, 1);
> +     load_loop_fn(&lock);
> +
> +     rte_eal_mp_wait_lcore();
> +
> +     RTE_LCORE_FOREACH(i) {
> +             printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +                             i, time_count[i]);
> +             total += time_count[i];
> +     }
> +
> +     printf("Total Cost Time = %"PRIu64" us\n", total);
> +
> +     return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
> --
> 2.7.4

Re: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests

Reply via email to