<snip> > >> > >> Avoid code duplication by combining single and multi threaded tests > >> > >> Signed-off-by: Dharmik Thakkar <dharmik.thak...@arm.com> > >> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > >> --- > >> app/test/test_lpm_perf.c | 362 > >> ++++++++++----------------------------- > >> 1 file changed, 91 insertions(+), 271 deletions(-) > >> > >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > >> index > >> 224c92fa3d65..229c835c23f7 100644 > >> --- a/app/test/test_lpm_perf.c > >> +++ b/app/test/test_lpm_perf.c > >> @@ -67,6 +67,12 @@ enum { > >> IP_CLASS_C > >> }; > >> > >> +enum { > >> +SINGLE_WRITER = 1, > >> +MULTI_WRITER_1, > >> +MULTI_WRITER_2 > >> +}; > > Do we need this? Can we use the number of cores instead? > > > > There are 3 combinations of writes (adds/deletes): > 1. Write all the entries - in case of a single writer 2. Write half of the > entries - > in case of multiple writers 3. Write remaining half of the entries - in case > of > multiple writers > > So, I think this is required. IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space. Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
> > >> + > >> /* struct route_rule_count defines the total number of rules in > >> following a/b/c > >> * each item in a[]/b[]/c[] is the number of common IP address class > >> A/B/C, not > >> * including the ones for private local network. > >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { > unsigned > >> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = > >> (uint8_t)((uintptr_t)arg); > >> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); > >> uint32_t next_hop_add = 0xAA; > >> > >> -/* 2 writer threads are used */ > >> -if (core_id % 2 == 0) { > >> +/* Single writer (writer_id = 1) */ > >> +if (writer_id == SINGLE_WRITER) { > >> +si = 0; > >> +ei = NUM_LDEPTH_ROUTE_ENTRIES; > >> +} > >> +/* 2 Writers (writer_id = 2/3)*/ > >> +else if (writer_id == MULTI_WRITER_1) { > >> si = 0; > >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > >> } else { > >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; > >> i < RCU_ITERATIONS; i++) { > >> /* Add all the entries */ > >> for (j = si; j < ei; j++) { > >> -pthread_mutex_lock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_lock(&lpm_mutex); > >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >> large_ldepth_route_table[j].depth, > >> next_hop_add) != 0) { > >> printf("Failed to add iteration %d, route# %d\n", i, j); > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> + > >> pthread_mutex_unlock(&lpm_mutex); > >> return -1; > >> } > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_unlock(&lpm_mutex); > >> } > >> > >> /* Delete all the entries */ > >> for (j = si; j < ei; j++) { > >> -pthread_mutex_lock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_lock(&lpm_mutex); > >> if (rte_lpm_delete(lpm, > >> large_ldepth_route_table[j].ip, > >> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete > >> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> + > >> pthread_mutex_unlock(&lpm_mutex); > >> return -1; > >> } > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_unlock(&lpm_mutex); > >> } > >> } > >> > >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > >> > >> /* > >> * Functional test: > >> - * 2 writers, rest are readers > >> + * 1/2 writers, rest are readers > >> */ > >> static int > >> -test_lpm_rcu_perf_multi_writer(void) > >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > >> { > >> struct rte_lpm_config config; > >> size_t sz; > >> -unsigned int i; > >> +unsigned int i, j; > >> uint16_t core_id; > >> struct rte_lpm_rcu_config rcu_cfg = {0}; > >> +int (*reader_f)(void *arg) = NULL; > >> > >> if (rte_lcore_count() < 3) { > >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >> least 3\n"); @@ -504,273 +522,76 @@ > >> test_lpm_rcu_perf_multi_writer(void) > >> num_cores++; > >> } > >> > >> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >> enabled\n", -num_cores - 2); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -/* Init RCU variable */ > >> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >> - > >> -rcu_cfg.v = rv; > >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, > >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto > >> error; -} > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> - > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 2; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >> -enabled_core_ids[i]); > >> - > >> -/* Launch writer threads */ > >> -for (i = 0; i < 2; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> -(void *)(uintptr_t)i, > >> -enabled_core_ids[i]); > >> - > >> -/* Wait for writer threads */ > >> -for (i = 0; i < 2; i++) > >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; > >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> -lpm = NULL; > >> -rv = NULL; > >> - > >> -/* Test without RCU integration */ > >> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >> disabled\n", -num_cores - 2); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 2; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]); > >> - > >> -/* Launch writer threads */ > >> -for (i = 0; i < 2; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> -(void *)(uintptr_t)i, > >> -enabled_core_ids[i]); > >> - > >> -/* Wait for writer threads */ > >> -for (i = 0; i < 2; i++) > >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; > >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> - > >> -return 0; > >> - > >> -error: > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> - > >> -return -1; > >> -} > >> - > >> -/* > >> - * Functional test: > >> - * Single writer, rest are readers > >> - */ > >> -static int > >> -test_lpm_rcu_perf(void) > >> -{ > >> -struct rte_lpm_config config; > >> -uint64_t begin, total_cycles; > >> -size_t sz; > >> -unsigned int i, j; > >> -uint16_t core_id; > >> -uint32_t next_hop_add = 0xAA; > >> -struct rte_lpm_rcu_config rcu_cfg = {0}; > >> - > >> -if (rte_lcore_count() < 2) { > >> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >> least 2\n"); -return TEST_SKIPPED; -} > >> - > >> -num_cores = 0; > >> -RTE_LCORE_FOREACH_WORKER(core_id) { > >> -enabled_core_ids[num_cores] = core_id; -num_cores++; -} > >> - > >> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >> enabled\n", -num_cores); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -/* Init RCU variable */ > >> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >> - > >> -rcu_cfg.v = rv; > >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, > >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto > >> error; -} > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >> -enabled_core_ids[i]); > >> - > >> -/* Measure add/delete. */ > >> -begin = rte_rdtsc_precise(); > >> -for (i = 0; i < RCU_ITERATIONS; i++) { > >> -/* Add all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm, > >> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth, > >> -next_hop_add) != 0) { > >> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >> +for (j = 1; j < 3; j++) { > >> +if (use_rcu) > >> +printf("\nPerf test: %d writer(s), %d reader(s)," > >> + " RCU integration enabled\n", j, num_cores - j); else > >> +printf("\nPerf test: %d writer(s), %d reader(s)," > >> + " RCU integration disabled\n", j, num_cores - j); > >> + > >> +/* Create LPM table */ > >> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > config.number_tbl8s = > >> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = > >> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> +TEST_LPM_ASSERT(lpm != NULL); > >> + > >> +/* Init RCU variable */ > >> +if (use_rcu) { > >> +sz = rte_rcu_qsbr_get_memsize(num_cores); > >> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> + > >> RTE_CACHE_LINE_SIZE); > >> +rte_rcu_qsbr_init(rv, num_cores); > >> + > >> +rcu_cfg.v = rv; > >> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, > >> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); > >> goto error; > >> } > >> > >> -/* Delete all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete > >> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles > >> = rte_rdtsc_precise() - begin; > >> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = > >> +test_lpm_reader; > >> > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >> +writer_done = 0; > >> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores; > >> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> -lpm = NULL; > >> -rv = NULL; > >> - > >> -/* Test without RCU integration */ > >> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >> disabled\n", -num_cores); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> > >> -writer_done = 0; > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> +/* Launch reader threads */ > >> +for (i = j; i < num_cores; i++) > >> +rte_eal_remote_launch(reader_f, NULL, > >> +enabled_core_ids[i]); > >> > >> -/* Launch reader threads */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_reader, NULL, > >> -enabled_core_ids[i]); > >> +/* Launch writer threads */ > >> +for (i = 0; i < j; i++) > >> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> +(void *)(uintptr_t)(i + j), > > This can be just 'j'? > > > >> +enabled_core_ids[i]); > >> > >> -/* Measure add/delete. */ > >> -begin = rte_rdtsc_precise(); > >> -for (i = 0; i < RCU_ITERATIONS; i++) { > >> -/* Add all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > >> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth, > >> -next_hop_add) != 0) { > >> -printf("Failed to add iteration %d, route# > >> %d\n", > >> -i, j); > >> +/* Wait for writer threads */ > >> +for (i = 0; i < j; i++) > >> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > >> goto error; > >> -} > >> > >> -/* Delete all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > >> -if (rte_lpm_delete(lpm, > >> large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth) != 0) { > >> -printf("Failed to delete iteration %d, route# > >> %d\n", > >> -i, j); > >> -goto error; > >> -} > >> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); > >> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > >> +printf("Average LPM Add/Del: %"PRIu64" cycles\n", > >> +__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) > >> +/ TOTAL_WRITES); > >> + > >> +writer_done = 1; > >> +/* Wait until all readers have exited */ > >> +for (i = j; i < num_cores; i++) > >> +rte_eal_wait_lcore(enabled_core_ids[i]); > >> + > >> +rte_lpm_free(lpm); > >> +rte_free(rv); > >> +lpm = NULL; > >> +rv = NULL; > >> } > >> -total_cycles = rte_rdtsc_precise() - begin; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); > >> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > >> -printf("Average LPM Add/Del: %g cycles\n", > >> -(double)total_cycles / TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> > >> return 0; > >> > >> @@ -946,9 +767,8 @@ test_lpm_perf(void) > >> rte_lpm_delete_all(lpm); > >> rte_lpm_free(lpm); > >> > >> -test_lpm_rcu_perf(); > >> - > >> -test_lpm_rcu_perf_multi_writer(); > >> +test_lpm_rcu_perf_multi_writer(0); > >> +test_lpm_rcu_perf_multi_writer(1); > >> > >> return 0; > >> } > >> -- > >> 2.17.1 >