Phil Yang <phil.y...@arm.com> writes: > Hi Aaron, > > It seems Travis CI cannot capture this timeout issue.
That's probably because we use -t 3 to multiply the timeout. We needed to do that because a few of the tests were on the cusp of failure in the Travis environment since it's a bit lower power. In more brawny systems, probably they aren't an issue. > But the local test records these two cases as TIMEOUT under the default > timeout configuration. > > 1. The test results on x86 server (72 lcores online@ 2.60GHz) are: > a. Default setting: > $ sudo meson test -C build_test --suite DPDK:fast-tests > 3/92 DPDK:fast-tests / atomic_autotest TIMEOUT 10.32s > 39/92 DPDK:fast-tests / mcslock_autotest TIMEOUT 10.32s > > b. Extend timeout: > $ sudo meson test -C build_test --suite DPDK:fast-tests -t 30 > 3/92 DPDK:fast-tests / atomic_autotest OK > 173.01s > 39/92 DPDK:fast-tests / mcslock_autotest OK 12.19s > > 2. The test results on aarch64 server (56 lcore online @ 2.0GHz) are: > a. Default setting: > $ sudo meson test -C build_test --suite DPDK:fast-tests > 3/92 DPDK:fast-tests / atomic_autotest TIMEOUT 10.47 s > 39/92 DPDK:fast-tests / mcslock_autotest TIMEOUT 10.47 s > > b. Extend timeout: > $ sudo meson test -C build_test --suite DPDK:fast-tests -t 60 > 3/92 DPDK:fast-tests / atomic_autotest OK 431.89 s > 39/92 DPDK:fast-tests / mcslock_autotest OK 21.25 s In all the cases it seems the mcslock_autotest is really not too far off, but the atomic_autotest looks really long no matter what. > So I think these two patches can resolve this issue. I'd love to hear your > thoughts. In general, I'm always happy to see perf tests more appropriately put under perf area. I don't have much opinion on this specific patch, though. > Thanks, > Phil > > <snip> > >> Subject: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to >> perf tests >> >> The MCS lock performance test takes more than 10 seconds and leads >> to meson test timeout on some platforms. Move the performance test >> into perf tests. >> >> Signed-off-by: Phil Yang <phil.y...@arm.com> >> Reviewed-by: Gavin Hu <gavin...@arm.com> >> --- >> MAINTAINERS | 1 + >> app/test/Makefile | 1 + >> app/test/autotest_data.py | 6 +++ >> app/test/meson.build | 2 + >> app/test/test_mcslock.c | 88 ------------------------------- >> app/test/test_mcslock_perf.c | 121 >> +++++++++++++++++++++++++++++++++++++++++++ >> 6 files changed, 131 insertions(+), 88 deletions(-) >> create mode 100644 app/test/test_mcslock_perf.c >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index db235c2..411bdeb 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL >> M: Phil Yang <phil.y...@arm.com> >> F: lib/librte_eal/common/include/generic/rte_mcslock.h >> F: app/test/test_mcslock.c >> +F: app/test/test_mcslock_perf.c >> >> Ticketlock >> M: Joyce Kong <joyce.k...@arm.com> >> diff --git a/app/test/Makefile b/app/test/Makefile >> index 1f080d1..97de3ac 100644 >> --- a/app/test/Makefile >> +++ b/app/test/Makefile >> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c >> SRCS-y += test_malloc.c >> SRCS-y += test_cycles.c >> SRCS-y += test_mcslock.c >> +SRCS-y += test_mcslock_perf.c >> SRCS-y += test_spinlock.c >> SRCS-y += test_ticketlock.c >> SRCS-y += test_memory.c >> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py >> index 7b1d013..2a4619d 100644 >> --- a/app/test/autotest_data.py >> +++ b/app/test/autotest_data.py >> @@ -784,6 +784,12 @@ >> "Func": default_autotest, >> "Report": None, >> }, >> + { >> + "Name": "MCS Lock performance autotest", >> + "Command": "mcslock_perf_autotest", >> + "Func": default_autotest, >> + "Report": None, >> + }, >> # >> # Please always make sure that ring_perf is the last test! >> # >> diff --git a/app/test/meson.build b/app/test/meson.build >> index 0a2ce71..335a869 100644 >> --- a/app/test/meson.build >> +++ b/app/test/meson.build >> @@ -82,6 +82,7 @@ test_sources = files('commands.c', >> 'test_meter.c', >> 'test_metrics.c', >> 'test_mcslock.c', >> + 'test_mcslock_perf.c', >> 'test_mp_secondary.c', >> 'test_per_lcore.c', >> 'test_pmd_perf.c', >> @@ -270,6 +271,7 @@ perf_test_names = [ >> 'rand_perf_autotest', >> 'hash_readwrite_perf_autotest', >> 'hash_readwrite_lf_perf_autotest', >> + 'mcslock_perf_autotest', >> ] >> >> driver_test_names = [ >> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c >> index e9359df..15f9751 100644 >> --- a/app/test/test_mcslock.c >> +++ b/app/test/test_mcslock.c >> @@ -32,23 +32,16 @@ >> * >> * - The function takes the global lock, display something, then releases >> * the global lock on each core. >> - * >> - * - A load test is carried out, with all cores attempting to lock a single >> - * lock multiple times. >> */ >> >> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me); >> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); >> -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); >> >> rte_mcslock_t *p_ml; >> rte_mcslock_t *p_ml_try; >> -rte_mcslock_t *p_ml_perf; >> >> static unsigned int count; >> >> -static rte_atomic32_t synchro; >> - >> static int >> test_mcslock_per_core(__attribute__((unused)) void *arg) >> { >> @@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void >> *arg) >> return 0; >> } >> >> -static uint64_t time_count[RTE_MAX_LCORE] = {0}; >> - >> #define MAX_LOOP 1000000 >> >> -static int >> -load_loop_fn(void *func_param) >> -{ >> - uint64_t time_diff = 0, begin; >> - uint64_t hz = rte_get_timer_hz(); >> - volatile uint64_t lcount = 0; >> - const int use_lock = *(int *)func_param; >> - const unsigned int lcore = rte_lcore_id(); >> - >> - /**< Per core me node. */ >> - rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me); >> - >> - /* wait synchro */ >> - while (rte_atomic32_read(&synchro) == 0) >> - ; >> - >> - begin = rte_get_timer_cycles(); >> - while (lcount < MAX_LOOP) { >> - if (use_lock) >> - rte_mcslock_lock(&p_ml_perf, &ml_perf_me); >> - >> - lcount++; >> - if (use_lock) >> - rte_mcslock_unlock(&p_ml_perf, &ml_perf_me); >> - } >> - time_diff = rte_get_timer_cycles() - begin; >> - time_count[lcore] = time_diff * 1000000 / hz; >> - return 0; >> -} >> - >> -static int >> -test_mcslock_perf(void) >> -{ >> - unsigned int i; >> - uint64_t total = 0; >> - int lock = 0; >> - const unsigned int lcore = rte_lcore_id(); >> - >> - printf("\nTest with no lock on single core...\n"); >> - rte_atomic32_set(&synchro, 1); >> - load_loop_fn(&lock); >> - printf("Core [%u] Cost Time = %"PRIu64" us\n", >> - lcore, time_count[lcore]); >> - memset(time_count, 0, sizeof(time_count)); >> - >> - printf("\nTest with lock on single core...\n"); >> - lock = 1; >> - rte_atomic32_set(&synchro, 1); >> - load_loop_fn(&lock); >> - printf("Core [%u] Cost Time = %"PRIu64" us\n", >> - lcore, time_count[lcore]); >> - memset(time_count, 0, sizeof(time_count)); >> - >> - printf("\nTest with lock on %u cores...\n", (rte_lcore_count())); >> - >> - rte_atomic32_set(&synchro, 0); >> - rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER); >> - >> - /* start synchro and launch test on master */ >> - rte_atomic32_set(&synchro, 1); >> - load_loop_fn(&lock); >> - >> - rte_eal_mp_wait_lcore(); >> - >> - RTE_LCORE_FOREACH(i) { >> - printf("Core [%u] Cost Time = %"PRIu64" us\n", >> - i, time_count[i]); >> - total += time_count[i]; >> - } >> - >> - printf("Total Cost Time = %"PRIu64" us\n", total); >> - >> - return 0; >> -} >> - >> /* >> * Use rte_mcslock_trylock() to trylock a mcs lock object, >> * If it could not lock the object successfully, it would >> @@ -240,10 +156,6 @@ test_mcslock(void) >> ret = -1; >> rte_mcslock_unlock(&p_ml, &ml_me); >> >> - /* mcs lock perf test */ >> - if (test_mcslock_perf() < 0) >> - return -1; >> - >> return ret; >> } >> >> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c >> new file mode 100644 >> index 0000000..6948344 >> --- /dev/null >> +++ b/app/test/test_mcslock_perf.c >> @@ -0,0 +1,121 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(c) 2019 Arm Limited >> + */ >> + >> +#include <stdio.h> >> +#include <stdint.h> >> +#include <inttypes.h> >> +#include <string.h> >> +#include <unistd.h> >> +#include <sys/queue.h> >> + >> +#include <rte_common.h> >> +#include <rte_memory.h> >> +#include <rte_per_lcore.h> >> +#include <rte_launch.h> >> +#include <rte_eal.h> >> +#include <rte_lcore.h> >> +#include <rte_cycles.h> >> +#include <rte_mcslock.h> >> +#include <rte_atomic.h> >> + >> +#include "test.h" >> + >> +/* >> + * RTE MCS lock perf test >> + * ====================== >> + * >> + * These tests are derived from spin lock perf test cases. >> + * >> + * - A load test is carried out, with all cores attempting to lock a single >> + * lock multiple times. >> + */ >> + >> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); >> +rte_mcslock_t *p_ml_perf; >> + >> +static rte_atomic32_t synchro; >> +static uint64_t time_count[RTE_MAX_LCORE] = {0}; >> + >> +#define MAX_LOOP 1000000 >> + >> +static int >> +load_loop_fn(void *func_param) >> +{ >> + uint64_t time_diff = 0, begin; >> + uint64_t hz = rte_get_timer_hz(); >> + volatile uint64_t lcount = 0; >> + const int use_lock = *(int *)func_param; >> + const unsigned int lcore = rte_lcore_id(); >> + >> + /**< Per core me node. */ >> + rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me); >> + >> + /* wait synchro */ >> + while (rte_atomic32_read(&synchro) == 0) >> + ; >> + >> + begin = rte_get_timer_cycles(); >> + while (lcount < MAX_LOOP) { >> + if (use_lock) >> + rte_mcslock_lock(&p_ml_perf, &ml_perf_me); >> + >> + lcount++; >> + if (use_lock) >> + rte_mcslock_unlock(&p_ml_perf, &ml_perf_me); >> + } >> + time_diff = rte_get_timer_cycles() - begin; >> + time_count[lcore] = time_diff * 1000000 / hz; >> + return 0; >> +} >> + >> +/* >> + * Test rte_eal_get_lcore_state() in addition to mcs locks >> + * as we have "waiting" then "running" lcores. >> + */ >> +static int >> +test_mcslock_perf(void) >> +{ >> + unsigned int i; >> + uint64_t total = 0; >> + int lock = 0; >> + const unsigned int lcore = rte_lcore_id(); >> + >> + printf("\nTest with no lock on single core...\n"); >> + rte_atomic32_set(&synchro, 1); >> + load_loop_fn(&lock); >> + printf("Core [%u] Cost Time = %"PRIu64" us\n", >> + lcore, time_count[lcore]); >> + memset(time_count, 0, sizeof(time_count)); >> + >> + printf("\nTest with lock on single core...\n"); >> + lock = 1; >> + rte_atomic32_set(&synchro, 1); >> + load_loop_fn(&lock); >> + printf("Core [%u] Cost Time = %"PRIu64" us\n", >> + lcore, time_count[lcore]); >> + memset(time_count, 0, sizeof(time_count)); >> + >> + printf("\nTest with lock on %u cores...\n", (rte_lcore_count())); >> + >> + rte_atomic32_set(&synchro, 0); >> + rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER); >> + >> + /* start synchro and launch test on master */ >> + rte_atomic32_set(&synchro, 1); >> + load_loop_fn(&lock); >> + >> + rte_eal_mp_wait_lcore(); >> + >> + RTE_LCORE_FOREACH(i) { >> + printf("Core [%u] Cost Time = %"PRIu64" us\n", >> + i, time_count[i]); >> + total += time_count[i]; >> + } >> + >> + printf("Total Cost Time = %"PRIu64" us\n", total); >> + >> + return 0; >> +} >> + >> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf); >> -- >> 2.7.4