Hi,

> 
> Reserve a per-lcore 4MB memzone and allocate thread stack of EAL threads 
> there for better NUMA locality of stack-allocated variables

I wonder if there any real performance improvement seen with that change?
Any case (existing DPDK app/example) that can demonstrate it? 
Konstantin

> 
> Signed-off-by: Christos Ricudis <ricu...@niometrics.com>
> ---
>  lib/librte_eal/linux/eal.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
> index 9530ee5..e047107 100644
> --- a/lib/librte_eal/linux/eal.c
> +++ b/lib/librte_eal/linux/eal.c
> @@ -68,6 +68,8 @@
> 
>  #define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups"
> 
> +#define THREAD_STACK_SIZE_DEFAULT (4ULL * 1024ULL * 1024ULL)
> +#include <rte_memzone.h>
>  /* Allow the application to print its usage message too if set */
>  static rte_usage_hook_t      rte_application_usage_hook = NULL;
> 
> @@ -1224,6 +1226,24 @@ static void rte_eal_init_alert(const char *msg)
> 
>               lcore_config[i].state = WAIT;
> 
> +             pthread_attr_t attr;
> +             pthread_attr_init(&attr);
> +             size_t thread_stack_size = THREAD_STACK_SIZE_DEFAULT;
> +             char thread_stack_name[64];
> +             snprintf(thread_stack_name, sizeof thread_stack_name, 
> "rte:lcore:%s:%d:threadstack", rte_eal_process_type() ==
> RTE_PROC_PRIMARY ? "p" : "s", i);
> +             const struct rte_memzone *mz = 
> rte_memzone_lookup(thread_stack_name);
> +             if (mz == NULL) {
> +                     if ((mz = rte_memzone_reserve(thread_stack_name, 
> thread_stack_size, lcore_config[i].socket_id, 0)) == NULL) {
> +                             rte_panic("Cannot allocate memzone for thread 
> stack");
> +                     }
> +             }
> +             void *thread_stack = mz->addr;
> +
> +             if (pthread_attr_setstack(&attr, thread_stack, 
> thread_stack_size) < 0) {
> +                     rte_panic("Cannot set thread stack\n");
> +             }
> +             RTE_LOG(DEBUG, EAL, "Thread stack for lcore %d on socket %d set 
> to %p\n", i, lcore_config[i].socket_id, thread_stack);
> +
>               /* create a thread for each lcore */
>               ret = pthread_create(&lcore_config[i].thread_id, NULL,
>                                    eal_thread_loop, NULL);
> --
> 1.8.3.1

Reply via email to