<snip> > >>>> > >>>> Avoid code duplication by combining single and multi threaded tests > >>>> > >>>> Signed-off-by: Dharmik Thakkar <dharmik.thak...@arm.com> > >>>> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > >>>> --- > >>>> app/test/test_lpm_perf.c | 362 > >>>> ++++++++++----------------------------- > >>>> 1 file changed, 91 insertions(+), 271 deletions(-) > >>>> > >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > >>>> index > >>>> 224c92fa3d65..229c835c23f7 100644 > >>>> --- a/app/test/test_lpm_perf.c > >>>> +++ b/app/test/test_lpm_perf.c > >>>> @@ -67,6 +67,12 @@ enum { > >>>> IP_CLASS_C > >>>> }; > >>>> > >>>> +enum { > >>>> +SINGLE_WRITER = 1, > >>>> +MULTI_WRITER_1, > >>>> +MULTI_WRITER_2 > >>>> +}; > >>> Do we need this? Can we use the number of cores instead? > >>> > >> > >> There are 3 combinations of writes (adds/deletes): > >> 1. Write all the entries - in case of a single writer 2. Write half > >> of the entries - in case of multiple writers 3. Write remaining half > >> of the entries - in case of multiple writers > >> > >> So, I think this is required. > > IMO, this is not scalable. Essentially, we need 2 parameters to divide the > routes among each writer thread. We need 2 parameters, 1) total number of > writers 2) the core ID in the linear space. > > Creating a structure with these 2 and passing that to the writer thread > would be better and scalable. > > Yes, agreed this is only applicable for 2 writers. Currently, the multi writer > test is only limited to a maximum of 2 writers. > To support more number of writers, we need something like this (which I > believe is in lines with your suggestion): > 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / > num_writers 2. Pass core ID in linear space as an argument to the writer > function: pos_core 3. Calculate si and ei in the writer function: si = > pos_core * > single_insert; ei = si + single_insert > > I can update the patch to enable more than 2 writers. > Do you also suggest we expand the scope of the test to test with more than > 2 writers? > This will increase the time for which the test is running (which currently is > significant even with 2 writers). Agree, no to increasing the number of writers. Yes for making the code more generic.
> > > > >> > >>>> + > >>>> /* struct route_rule_count defines the total number of rules in > >>>> following a/b/c > >>>> * each item in a[]/b[]/c[] is the number of common IP address class > >>>> A/B/C, not > >>>> * including the ones for private local network. > >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { > >> unsigned > >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = > >>>> (uint8_t)((uintptr_t)arg); > >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); > >>>> uint32_t next_hop_add = 0xAA; > >>>> > >>>> -/* 2 writer threads are used */ > >>>> -if (core_id % 2 == 0) { > >>>> +/* Single writer (writer_id = 1) */ if (writer_id == > >>>> +SINGLE_WRITER) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES; } > >>>> +/* 2 Writers (writer_id = 2/3)*/ > >>>> +else if (writer_id == MULTI_WRITER_1) { > >>>> si = 0; > >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > >>>> } else { > >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = > >>>> 0; i < RCU_ITERATIONS; i++) { > >>>> /* Add all the entries */ > >>>> for (j = si; j < ei; j++) { > >>>> -pthread_mutex_lock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_lock(&lpm_mutex); > >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> large_ldepth_route_table[j].depth, > >>>> next_hop_add) != 0) { > >>>> printf("Failed to add iteration %d, route# %d\n", i, j); > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> + > >>>> pthread_mutex_unlock(&lpm_mutex); > >>>> return -1; > >>>> } > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_unlock(&lpm_mutex); > >>>> } > >>>> > >>>> /* Delete all the entries */ > >>>> for (j = si; j < ei; j++) { > >>>> -pthread_mutex_lock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_lock(&lpm_mutex); > >>>> if (rte_lpm_delete(lpm, > >>>> large_ldepth_route_table[j].ip, > >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete > >>>> iteration %d, route# %d\n", i, j); > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> + > >>>> pthread_mutex_unlock(&lpm_mutex); > >>>> return -1; > >>>> } > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_unlock(&lpm_mutex); > >>>> } > >>>> } > >>>> > >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > >>>> > >>>> /* > >>>> * Functional test: > >>>> - * 2 writers, rest are readers > >>>> + * 1/2 writers, rest are readers > >>>> */ > >>>> static int > >>>> -test_lpm_rcu_perf_multi_writer(void) > >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > >>>> { > >>>> struct rte_lpm_config config; > >>>> size_t sz; > >>>> -unsigned int i; > >>>> +unsigned int i, j; > >>>> uint16_t core_id; > >>>> struct rte_lpm_rcu_config rcu_cfg = {0}; > >>>> +int (*reader_f)(void *arg) = NULL; > >>>> > >>>> if (rte_lcore_count() < 3) { > >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >>>> least 3\n"); @@ -504,273 +522,76 @@ > >>>> test_lpm_rcu_perf_multi_writer(void) > >>>> num_cores++; > >>>> } > >>>> > >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >>>> enabled\n", -num_cores - 2); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -/* Init RCU variable */ > >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >>>> - > >>>> -rcu_cfg.v = rv; > >>>> -/* Assign the RCU variable to LPM */ -if > >>>> (rte_lpm_rcu_qsbr_add(lpm, > >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); > >>>> -goto error; -} > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> - > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 2; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Launch writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> -(void *)(uintptr_t)i, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Wait for writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 2; i < > >>>> num_cores; > >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> -lpm = NULL; > >>>> -rv = NULL; > >>>> - > >>>> -/* Test without RCU integration */ -printf("\nPerf test: 2 > >>>> writers, %d readers, RCU integration disabled\n", -num_cores - 2); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 2; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Launch writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> -(void *)(uintptr_t)i, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Wait for writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 2; i < > >>>> num_cores; > >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> - > >>>> -return 0; > >>>> - > >>>> -error: > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> - > >>>> -return -1; > >>>> -} > >>>> - > >>>> -/* > >>>> - * Functional test: > >>>> - * Single writer, rest are readers > >>>> - */ > >>>> -static int > >>>> -test_lpm_rcu_perf(void) > >>>> -{ > >>>> -struct rte_lpm_config config; > >>>> -uint64_t begin, total_cycles; > >>>> -size_t sz; > >>>> -unsigned int i, j; > >>>> -uint16_t core_id; > >>>> -uint32_t next_hop_add = 0xAA; > >>>> -struct rte_lpm_rcu_config rcu_cfg = {0}; > >>>> - > >>>> -if (rte_lcore_count() < 2) { > >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >>>> least 2\n"); -return TEST_SKIPPED; -} > >>>> - > >>>> -num_cores = 0; > >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { - > enabled_core_ids[num_cores] = > >>>> core_id; -num_cores++; -} > >>>> - > >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >>>> enabled\n", -num_cores); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -/* Init RCU variable */ > >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >>>> - > >>>> -rcu_cfg.v = rv; > >>>> -/* Assign the RCU variable to LPM */ -if > >>>> (rte_lpm_rcu_qsbr_add(lpm, > >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); > >>>> -goto error; -} > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 0; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Measure add/delete. */ > >>>> -begin = rte_rdtsc_precise(); > >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { > >>>> -/* Add all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth, > >>>> -next_hop_add) != 0) { > >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >>>> +for (j = 1; j < 3; j++) { > >>>> +if (use_rcu) > >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," > >>>> + " RCU integration enabled\n", j, num_cores - j); else > >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," > >>>> + " RCU integration disabled\n", j, num_cores - j); > >>>> + > >>>> +/* Create LPM table */ > >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > >> config.number_tbl8s = > >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = > >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> +TEST_LPM_ASSERT(lpm != NULL); > >>>> + > >>>> +/* Init RCU variable */ > >>>> +if (use_rcu) { > >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> + > >>>> RTE_CACHE_LINE_SIZE); > >>>> +rte_rcu_qsbr_init(rv, num_cores); > >>>> + > >>>> +rcu_cfg.v = rv; > >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, > >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); > >>>> goto error; > >>>> } > >>>> > >>>> -/* Delete all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to > >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} -} > >>>> -total_cycles = rte_rdtsc_precise() - begin; > >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = > >>>> +test_lpm_reader; > >>>> > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >>>> +writer_done = 0; > >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 0; i < > >>>> num_cores; > >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> -lpm = NULL; > >>>> -rv = NULL; > >>>> - > >>>> -/* Test without RCU integration */ -printf("\nPerf test: 1 writer, > >>>> %d readers, RCU integration disabled\n", -num_cores); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> +/* Launch reader threads */ > >>>> +for (i = j; i < num_cores; i++) > >>>> +rte_eal_remote_launch(reader_f, NULL, enabled_core_ids[i]); > >>>> > >>>> -/* Launch reader threads */ > >>>> -for (i = 0; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> +/* Launch writer threads */ > >>>> +for (i = 0; i < j; i++) > >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> +(void *)(uintptr_t)(i + j), > >>> This can be just 'j'? > >>> > >>>> +enabled_core_ids[i]); > >>>> > >>>> -/* Measure add/delete. */ > >>>> -begin = rte_rdtsc_precise(); > >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { > >>>> -/* Add all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth, > >>>> -next_hop_add) != 0) { > >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >>>> +/* Wait for writer threads */ > >>>> +for (i = 0; i < j; i++) > >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > >>>> goto error; > >>>> -} > >>>> > >>>> -/* Delete all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to > >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} > >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); printf("Total LPM > >>>> +Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: > >>>> +%"PRIu64" cycles\n", __atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) > >>>> +/ TOTAL_WRITES); > >>>> + > >>>> +writer_done = 1; > >>>> +/* Wait until all readers have exited */ for (i = j; i < > >>>> +num_cores; i++) rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> + > >>>> +rte_lpm_free(lpm); > >>>> +rte_free(rv); > >>>> +lpm = NULL; > >>>> +rv = NULL; > >>>> } > >>>> -total_cycles = rte_rdtsc_precise() - begin; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 0; i < > >>>> num_cores; i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> > >>>> return 0; > >>>> > >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); > >>>> rte_lpm_free(lpm); > >>>> > >>>> -test_lpm_rcu_perf(); > >>>> - > >>>> -test_lpm_rcu_perf_multi_writer(); > >>>> +test_lpm_rcu_perf_multi_writer(0); > >>>> +test_lpm_rcu_perf_multi_writer(1); > >>>> > >>>> return 0; > >>>> } > >>>> -- > >>>> 2.17.1 >