Currently lcore_conf is a global static array. When multiple ports/lcores are distributed across different NUMA nodes, datapath suffers from severe cross-NUMA memory access penalty.
On Kunpeng platform, sizeof(struct lcore_conf) reaches 133760 bytes. Such a huge per-lcore structure significantly amplifies cross-NUMA overhead in multi-port multi-NUMA deployment scenarios. This commit refactors lcore_conf to pointer array and implements per-lcore NUMA-aware hugepage allocation, allocating each lcore's configuration on its local socket to eliminate remote memory access. Signed-off-by: Chengwen Feng <[email protected]> --- v2: Fix AI review comment: add cleanup when init lcore-conf fail --- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_acl.c | 2 +- examples/l3fwd/l3fwd_em.c | 8 +++--- examples/l3fwd/l3fwd_fib.c | 6 ++--- examples/l3fwd/l3fwd_lpm.c | 8 +++--- examples/l3fwd/main.c | 52 +++++++++++++++++++++++++++++++------- 6 files changed, 56 insertions(+), 22 deletions(-) diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 349fc37c79..20c457ee7f 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -104,7 +104,7 @@ extern uint32_t hash_entry_number; extern xmm_t val_eth[RTE_MAX_ETHPORTS]; -extern struct lcore_conf lcore_conf[RTE_MAX_LCORE]; +extern struct lcore_conf *lcore_conf[RTE_MAX_LCORE]; extern struct parm_cfg parm_config; diff --git a/examples/l3fwd/l3fwd_acl.c b/examples/l3fwd/l3fwd_acl.c index 4ee3411d2a..0605f6feb9 100644 --- a/examples/l3fwd/l3fwd_acl.c +++ b/examples/l3fwd/l3fwd_acl.c @@ -1090,7 +1090,7 @@ acl_main_loop(__rte_unused void *dummy) prev_tsc = 0; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; socketid = rte_lcore_to_socket_id(lcore_id); if (qconf->n_rx_queue == 0) { diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index d8748a0edd..9aa23759d3 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -597,7 +597,7 @@ em_main_loop(__rte_unused void *dummy) US_PER_S * BURST_TX_DRAIN_US; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; const uint16_t n_rx_q = qconf->n_rx_queue; const uint16_t n_tx_p = qconf->n_tx_port; @@ -685,7 +685,7 @@ em_event_loop_single(struct l3fwd_event_resources *evt_rsrc, return; lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -747,7 +747,7 @@ em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); @@ -877,7 +877,7 @@ em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, if (dst_ports == NULL) return; lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); diff --git a/examples/l3fwd/l3fwd_fib.c b/examples/l3fwd/l3fwd_fib.c index 4fc6bf90d5..32c8847130 100644 --- a/examples/l3fwd/l3fwd_fib.c +++ b/examples/l3fwd/l3fwd_fib.c @@ -192,7 +192,7 @@ fib_main_loop(__rte_unused void *dummy) US_PER_S * BURST_TX_DRAIN_US; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; const uint16_t n_rx_q = qconf->n_rx_queue; const uint16_t n_tx_p = qconf->n_tx_port; @@ -282,7 +282,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); @@ -446,7 +446,7 @@ fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr, uint16_t nh; int i; - lconf = &lcore_conf[rte_lcore_id()]; + lconf = lcore_conf[rte_lcore_id()]; /* Reset counters. */ ipv4_cnt = 0; diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 2d2651e750..77701503f7 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -154,7 +154,7 @@ lpm_main_loop(__rte_unused void *dummy) US_PER_S * BURST_TX_DRAIN_US; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; const uint16_t n_rx_q = qconf->n_rx_queue; const uint16_t n_tx_p = qconf->n_tx_port; @@ -270,7 +270,7 @@ lpm_event_loop_single(struct l3fwd_event_resources *evt_rsrc, return; lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -324,7 +324,7 @@ lpm_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); @@ -468,7 +468,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, return; lcore_id = rte_lcore_id(); - lconf = &lcore_conf[lcore_id]; + lconf = lcore_conf[lcore_id]; dst_port_list = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, RTE_CACHE_LINE_SIZE); diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index df035b508c..af863407d0 100644 --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -96,7 +96,7 @@ uint32_t enabled_port_mask; /* Used only in exact match mode. */ bool ipv6_enabled; /**< ipv6 is false by default. */ -struct lcore_conf lcore_conf[RTE_MAX_LCORE]; +struct lcore_conf *lcore_conf[RTE_MAX_LCORE]; struct parm_cfg parm_config; @@ -368,17 +368,17 @@ init_lcore_rx_queues(void) for (i = 0; i < nb_lcore_params; ++i) { lcore = lcore_params[i].lcore_id; - nb_rx_queue = lcore_conf[lcore].n_rx_queue; + nb_rx_queue = lcore_conf[lcore]->n_rx_queue; if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { printf("error: too many queues (%u) for lcore: %u\n", (unsigned int)nb_rx_queue + 1, lcore); return -1; } else { - lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = + lcore_conf[lcore]->rx_queue_list[nb_rx_queue].port_id = lcore_params[i].port_id; - lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = + lcore_conf[lcore]->rx_queue_list[nb_rx_queue].queue_id = lcore_params[i].queue_id; - lcore_conf[lcore].n_rx_queue++; + lcore_conf[lcore]->n_rx_queue++; } } return 0; @@ -1118,6 +1118,36 @@ print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr) printf("%s%s", name, buf); } +static int +init_lcore_conf(void) +{ + unsigned int lcore_id; + int socketid; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socketid = rte_lcore_to_socket_id(lcore_id); + lcore_conf[lcore_id] = rte_zmalloc_socket(NULL, sizeof(struct lcore_conf), + RTE_CACHE_LINE_SIZE, socketid); + if (lcore_conf[lcore_id] == NULL) + goto cleanup; + } + + return 0; + +cleanup: + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (lcore_conf[lcore_id] != NULL) { + rte_free(lcore_conf[lcore_id]); + lcore_conf[lcore_id] = NULL; + } + } + + return -ENOMEM; +} + int init_mem(uint16_t portid, unsigned int nb_mbuf) { @@ -1189,7 +1219,7 @@ init_mem(uint16_t portid, unsigned int nb_mbuf) } #endif - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; qconf->ipv4_lookup_struct = l3fwd_lkp.get_ipv4_lookup_struct(socketid); qconf->ipv6_lookup_struct = @@ -1492,7 +1522,7 @@ l3fwd_poll_resource_setup(void) "rte_eth_tx_queue_setup: err=%d, " "port=%d\n", ret, portid); - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; qconf->tx_queue_id[portid] = queueid; queueid++; @@ -1505,7 +1535,7 @@ l3fwd_poll_resource_setup(void) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (rte_lcore_is_enabled(lcore_id) == 0) continue; - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); fflush(stdout); /* init RX queues */ @@ -1667,6 +1697,10 @@ main(int argc, char **argv) argc -= ret; argv += ret; + ret = init_lcore_conf(); + if (ret) + rte_exit(EXIT_FAILURE, "Init lcore conf failed!\n"); + force_quit = false; signal(SIGINT, signal_handler); signal(SIGTERM, signal_handler); @@ -1749,7 +1783,7 @@ main(int argc, char **argv) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (rte_lcore_is_enabled(lcore_id) == 0) continue; - qconf = &lcore_conf[lcore_id]; + qconf = lcore_conf[lcore_id]; for (queue = 0; queue < qconf->n_rx_queue; ++queue) { portid = qconf->rx_queue_list[queue].port_id; queueid = qconf->rx_queue_list[queue].queue_id; -- 2.17.1

