lpm: avoid code duplication in rcu qsbr perf

Medvedkin, Vladimir Tue, 03 Nov 2020 10:03:21 -0800

Hi,

On 03/11/2020 14:03, Dharmik Thakkar wrote:

On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli 
<honnappa.nagaraha...@arm.com> wrote:

<snip>


Avoid code duplication by combining single and multi threaded tests

Signed-off-by: Dharmik Thakkar <dharmik.thak...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
---
app/test/test_lpm_perf.c | 362
++++++++++-----------------------------
1 file changed, 91 insertions(+), 271 deletions(-)

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index
224c92fa3d65..229c835c23f7 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -67,6 +67,12 @@ enum {
IP_CLASS_C
};

+enum {
+SINGLE_WRITER = 1,
+MULTI_WRITER_1,
+MULTI_WRITER_2
+};

Do we need this? Can we use the number of cores instead?


There are 3 combinations of writes (adds/deletes):
1. Write all the entries - in case of a single writer 2. Write half of the 
entries -
in case of multiple writers 3. Write remaining half of the entries - in case of
multiple writers

So, I think this is required.

IMO, this is not scalable. Essentially, we need 2 parameters to divide the 
routes among each writer thread. We need 2 parameters, 1) total number of 
writers 2) the core ID in the linear space.
Creating a structure with these 2 and passing that to the writer thread would 
be better and scalable.


Yes, agreed this is only applicable for 2 writers. Currently, the multi writer 
test is only limited to a maximum of 2 writers.
To support more number of writers, we need something like this (which I believe 
is in lines with your suggestion):
1. Calculate what each writer will write: single_insert = TOTAL_WRITES / 
num_writers
2. Pass core ID in linear space as an argument to the writer function: pos_core
3. Calculate si and ei in the writer function: si = pos_core * single_insert; 
ei = si + single_insert

Agree to Honnappa suggestion, for me it looks good, better than previousimplementation.

I can update the patch to enable more than 2 writers.
Do you also suggest we expand the scope of the test to test with more than 2 
writers?
This will increase the time for which the test is running (which currently is 
significant even with 2 writers).


I don't see any reason to increase the number of writers more than 2.

+
/* struct route_rule_count defines the total number of rules in
following a/b/c
* each item in a[]/b[]/c[] is the number of common IP address class
A/B/C, not
* including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {

unsigned

int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
(uint8_t)((uintptr_t)arg);
+uint8_t writer_id = (uint8_t)((uintptr_t)arg);
uint32_t next_hop_add = 0xAA;

-/* 2 writer threads are used */
-if (core_id % 2 == 0) {
+/* Single writer (writer_id = 1) */
+if (writer_id == SINGLE_WRITER) {
+si = 0;
+ei = NUM_LDEPTH_ROUTE_ENTRIES;
+}
+/* 2 Writers (writer_id = 2/3)*/
+else if (writer_id == MULTI_WRITER_1) {
si = 0;
ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
} else {
@@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
i < RCU_ITERATIONS; i++) {
/* Add all the entries */
for (j = si; j < ei; j++) {
-pthread_mutex_lock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+pthread_mutex_lock(&lpm_mutex);
if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
large_ldepth_route_table[j].depth,
next_hop_add) != 0) {
printf("Failed to add iteration %d, route# %d\n", i, j);
-pthread_mutex_unlock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+
pthread_mutex_unlock(&lpm_mutex);
return -1;
}
-pthread_mutex_unlock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+pthread_mutex_unlock(&lpm_mutex);
}

/* Delete all the entries */
for (j = si; j < ei; j++) {
-pthread_mutex_lock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+pthread_mutex_lock(&lpm_mutex);
if (rte_lpm_delete(lpm,
large_ldepth_route_table[j].ip,
large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+
pthread_mutex_unlock(&lpm_mutex);
return -1;
}
-pthread_mutex_unlock(&lpm_mutex);
+if (writer_id != SINGLE_WRITER)
+pthread_mutex_unlock(&lpm_mutex);
}
}

@@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)

/*
* Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
*/
static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
{
struct rte_lpm_config config;
size_t sz;
-unsigned int i;
+unsigned int i, j;
uint16_t core_id;
struct rte_lpm_rcu_config rcu_cfg = {0};
+int (*reader_f)(void *arg) = NULL;

if (rte_lcore_count() < 3) {
printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
least 3\n"); @@ -504,273 +522,76 @@
test_lpm_rcu_perf_multi_writer(void)
num_cores++;
}

-printf("\nPerf test: 2 writers, %d readers, RCU integration
enabled\n", -num_cores - 2);
-
-/* Create LPM table */
-config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -

config.number_tbl8s =

NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-TEST_LPM_ASSERT(lpm != NULL);
-
-/* Init RCU variable */
-sz = rte_rcu_qsbr_get_memsize(num_cores);
-rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
-
-rcu_cfg.v = rv;
-/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
&rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
error; -}
-
-writer_done = 0;
-__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-/* Launch reader threads */
-for (i = 2; i < num_cores; i++)
-rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-enabled_core_ids[i]);
-
-/* Launch writer threads */
-for (i = 0; i < 2; i++)
-rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-(void *)(uintptr_t)i,
-enabled_core_ids[i]);
-
-/* Wait for writer threads */
-for (i = 0; i < 2; i++)
-if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
-
-printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
%"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
__ATOMIC_RELAXED) -/ TOTAL_WRITES);
-
-writer_done = 1;
-/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
-
-rte_lpm_free(lpm);
-rte_free(rv);
-lpm = NULL;
-rv = NULL;
-
-/* Test without RCU integration */
-printf("\nPerf test: 2 writers, %d readers, RCU integration
disabled\n", -num_cores - 2);
-
-/* Create LPM table */
-config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -

config.number_tbl8s =

NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-TEST_LPM_ASSERT(lpm != NULL);
-
-writer_done = 0;
-__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-/* Launch reader threads */
-for (i = 2; i < num_cores; i++)
-rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
-
-/* Launch writer threads */
-for (i = 0; i < 2; i++)
-rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-(void *)(uintptr_t)i,
-enabled_core_ids[i]);
-
-/* Wait for writer threads */
-for (i = 0; i < 2; i++)
-if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
-
-printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
%"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
__ATOMIC_RELAXED) -/ TOTAL_WRITES);
-
-writer_done = 1;
-/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
-
-rte_lpm_free(lpm);
-
-return 0;
-
-error:
-writer_done = 1;
-/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
-
-rte_lpm_free(lpm);
-rte_free(rv);
-
-return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-struct rte_lpm_config config;
-uint64_t begin, total_cycles;
-size_t sz;
-unsigned int i, j;
-uint16_t core_id;
-uint32_t next_hop_add = 0xAA;
-struct rte_lpm_rcu_config rcu_cfg = {0};
-
-if (rte_lcore_count() < 2) {
-printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
least 2\n"); -return TEST_SKIPPED; -}
-
-num_cores = 0;
-RTE_LCORE_FOREACH_WORKER(core_id) {
-enabled_core_ids[num_cores] = core_id; -num_cores++; -}
-
-printf("\nPerf test: 1 writer, %d readers, RCU integration
enabled\n", -num_cores);
-
-/* Create LPM table */
-config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -

config.number_tbl8s =

NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-TEST_LPM_ASSERT(lpm != NULL);
-
-/* Init RCU variable */
-sz = rte_rcu_qsbr_get_memsize(num_cores);
-rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
-
-rcu_cfg.v = rv;
-/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
&rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
error; -}
-
-writer_done = 0;
-__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-/* Launch reader threads */
-for (i = 0; i < num_cores; i++)
-rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-enabled_core_ids[i]);
-
-/* Measure add/delete. */
-begin = rte_rdtsc_precise();
-for (i = 0; i < RCU_ITERATIONS; i++) {
-/* Add all the entries */
-for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
-next_hop_add) != 0) {
-printf("Failed to add iteration %d, route# %d\n", -i, j);
+for (j = 1; j < 3; j++) {
+if (use_rcu)
+printf("\nPerf test: %d writer(s), %d reader(s),"
+       " RCU integration enabled\n", j, num_cores - j); else
+printf("\nPerf test: %d writer(s), %d reader(s),"
+       " RCU integration disabled\n", j, num_cores - j);
+
+/* Create LPM table */
+config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;

config.number_tbl8s =

+NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
+rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+TEST_LPM_ASSERT(lpm != NULL);
+
+/* Init RCU variable */
+if (use_rcu) {
+sz = rte_rcu_qsbr_get_memsize(num_cores);
+rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+
RTE_CACHE_LINE_SIZE);
+rte_rcu_qsbr_init(rv, num_cores);
+
+rcu_cfg.v = rv;
+/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
+&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
goto error;
}

-/* Delete all the entries */
-for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
(rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
= rte_rdtsc_precise() - begin;
+reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
+test_lpm_reader;

-printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
cycles\n", -(double)total_cycles / TOTAL_WRITES);
+writer_done = 0;
+__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);

-writer_done = 1;
-/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
-
-rte_lpm_free(lpm);
-rte_free(rv);
-lpm = NULL;
-rv = NULL;
-
-/* Test without RCU integration */
-printf("\nPerf test: 1 writer, %d readers, RCU integration
disabled\n", -num_cores);
-
-/* Create LPM table */
-config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -

config.number_tbl8s =

NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-TEST_LPM_ASSERT(lpm != NULL);
+__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);

-writer_done = 0;
-__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+/* Launch reader threads */
+for (i = j; i < num_cores; i++)
+rte_eal_remote_launch(reader_f, NULL,
+enabled_core_ids[i]);

-/* Launch reader threads */
-for (i = 0; i < num_cores; i++)
-rte_eal_remote_launch(test_lpm_reader, NULL,
-enabled_core_ids[i]);
+/* Launch writer threads */
+for (i = 0; i < j; i++)
+rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+(void *)(uintptr_t)(i + j),

This can be just 'j'?

+enabled_core_ids[i]);

-/* Measure add/delete. */
-begin = rte_rdtsc_precise();
-for (i = 0; i < RCU_ITERATIONS; i++) {
-/* Add all the entries */
-for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-large_ldepth_route_table[j].depth,
-next_hop_add) != 0) {
-printf("Failed to add iteration %d, route#
%d\n",
-i, j);
+/* Wait for writer threads */
+for (i = 0; i < j; i++)
+if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
goto error;
-}

-/* Delete all the entries */
-for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-if (rte_lpm_delete(lpm,
large_ldepth_route_table[j].ip,
-large_ldepth_route_table[j].depth) != 0) {
-printf("Failed to delete iteration %d, route#
%d\n",
-i, j);
-goto error;
-}
+printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+__atomic_load_n(&gwrite_cycles,
__ATOMIC_RELAXED)
+/ TOTAL_WRITES);
+
+writer_done = 1;
+/* Wait until all readers have exited */
+for (i = j; i < num_cores; i++)
+rte_eal_wait_lcore(enabled_core_ids[i]);
+
+rte_lpm_free(lpm);
+rte_free(rv);
+lpm = NULL;
+rv = NULL;
}
-total_cycles = rte_rdtsc_precise() - begin;
-
-printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-printf("Average LPM Add/Del: %g cycles\n",
-(double)total_cycles / TOTAL_WRITES);
-
-writer_done = 1;
-/* Wait until all readers have exited */
-for (i = 0; i < num_cores; i++)
-rte_eal_wait_lcore(enabled_core_ids[i]);
-
-rte_lpm_free(lpm);

return 0;

@@ -946,9 +767,8 @@ test_lpm_perf(void)
rte_lpm_delete_all(lpm);
rte_lpm_free(lpm);

-test_lpm_rcu_perf();
-
-test_lpm_rcu_perf_multi_writer();
+test_lpm_rcu_perf_multi_writer(0);
+test_lpm_rcu_perf_multi_writer(1);

return 0;
}
--
2.17.1


--
Regards,
Vladimir

Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf

Reply via email to