Hi Cheng, Few comments inline. Please check.
Thanks, Dengdui On 2023/6/13 12:31, Cheng Jiang wrote: > There are many high-performance DMA devices supported in DPDK now, and > these DMA devices can also be integrated into other modules of DPDK as > accelerators, such as Vhost. Before integrating DMA into applications, > developers need to know the performance of these DMA devices in various > scenarios and the performance of CPUs in the same scenario, such as > different buffer lengths. Only in this way can we know the target > performance of the application accelerated by using them. This patch > introduces a high-performance testing tool, which supports comparing the > performance of CPU and DMA in different scenarios automatically with a > pre-set config file. Memory Copy performance test are supported for now. > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > Signed-off-by: Jiayu Hu <jiayu...@intel.com> > Signed-off-by: Yuan Wang <yuanx.w...@intel.com> > Acked-by: Morten Brørup <m...@smartsharesystems.com> > Acked-by: Chenbo Xia <chenbo....@intel.com> > --- > v6: > improved code based on Anoob's comments; > fixed some code structure issues; > v5: > fixed some LONG_LINE warnings; > v4: > fixed inaccuracy of the memory footprint display; > v3: > fixed some typos; > v2: > added lcore/dmadev designation; > added error case process; > removed worker_threads parameter from config.ini; > improved the logs; > improved config file; > > app/meson.build | 1 + > app/test-dma-perf/benchmark.c | 477 ++++++++++++++++++++++++++++ > app/test-dma-perf/config.ini | 59 ++++ > app/test-dma-perf/main.c | 569 ++++++++++++++++++++++++++++++++++ > app/test-dma-perf/main.h | 69 +++++ > app/test-dma-perf/meson.build | 17 + > 6 files changed, 1192 insertions(+) > create mode 100644 app/test-dma-perf/benchmark.c > create mode 100644 app/test-dma-perf/config.ini > create mode 100644 app/test-dma-perf/main.c > create mode 100644 app/test-dma-perf/main.h > create mode 100644 app/test-dma-perf/meson.build > > diff --git a/app/meson.build b/app/meson.build > index 74d2420f67..4fc1a83eba 100644 > --- a/app/meson.build > +++ b/app/meson.build > @@ -19,6 +19,7 @@ apps = [ > 'test-cmdline', > 'test-compress-perf', > 'test-crypto-perf', > + 'test-dma-perf', > 'test-eventdev', > 'test-fib', > 'test-flow-perf', > diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c > new file mode 100644 > index 0000000000..bc1ca82297 > --- /dev/null > +++ b/app/test-dma-perf/benchmark.c > @@ -0,0 +1,477 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2023 Intel Corporation > + */ > + <snip> > +static inline int > +__rte_format_printf(3, 4) > +print_err(const char *func, int lineno, const char *format, ...) > +{ > + va_list ap; > + int ret; > + > + ret = fprintf(stderr, "In %s:%d - ", func, lineno); > + va_start(ap, format); > + ret += vfprintf(stderr, format, ap); > + va_end(ap); > + > + return ret; > +} > + > +static inline void > +calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, > uint16_t test_secs, > + uint32_t total_cnt, float *memory, uint32_t > *ave_cycle, > + float *bandwidth, float *mops) > +{ > + *memory = (float)(buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024); > + *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt; > + *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) > / 1000000000; > + *mops = (float)rte_get_timer_hz() / *ave_cycle / 1000000; The value of ave_cycle may be 0. *mops = (float)(total_cnt / test_secs) / 1000000; ? > +} > + > +static void > +output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, > uint64_t ave_cycle, > + uint32_t buf_size, uint32_t nr_buf, float memory, > + float bandwidth, float mops, bool is_dma) > +{ > + if (is_dma) > + printf("lcore %u, DMA %s:\n", lcore_id, dma_name); > + else > + printf("lcore %u\n", lcore_id); > + > + printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, > memory: %.2lfMB, frequency: %" PRIu64 ".\n", > + ave_cycle, buf_size, nr_buf, memory, > rte_get_timer_hz()); > + printf("Average bandwidth: %.3lfGbps, MOps: %.3lf\n", bandwidth, mops); > + > + if (is_dma) > + snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, > CSV_LINE_DMA_FMT, > + scenario_id, lcore_id, dma_name, buf_size, > + nr_buf, memory, ave_cycle, bandwidth, mops); > + else > + snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, > CSV_LINE_CPU_FMT, > + scenario_id, lcore_id, buf_size, > + nr_buf, memory, ave_cycle, bandwidth, mops); > +} > + > +static inline void > +cache_flush_buf(__maybe_unused struct rte_mbuf **array, > + __maybe_unused uint32_t buf_size, > + __maybe_unused uint32_t nr_buf) > +{ > +#ifdef RTE_ARCH_X86_64 > + char *data; > + struct rte_mbuf **srcs = array; > + uint32_t i, offset; > + > + for (i = 0; i < nr_buf; i++) { > + data = rte_pktmbuf_mtod(srcs[i], char *); > + for (offset = 0; offset < buf_size; offset += 64) > + __builtin_ia32_clflush(data + offset); > + } > +#endif > +} > + <snip> > + > +/* Parse the argument given in the command line of the application */ > +static int > +append_eal_args(int argc, char **argv, const char *eal_args, char **new_argv) > +{ > + int i; > + char *tokens[MAX_EAL_PARAM_NB]; > + char args[MAX_EAL_PARAM_LEN] = {0}; > + int token_nb, new_argc = 0; > + > + for (i = 0; i < argc; i++) { > + if ((strcmp(argv[i], CMDLINE_CONFIG_ARG) == 0) || > + (strcmp(argv[i], CMDLINE_RESULT_ARG) == 0)) { > + i++; > + continue; > + } > + strlcpy(new_argv[new_argc], argv[i], > sizeof(new_argv[new_argc])); The type_of argv[new_argc] is *char. Cannot use sizeof(). strcpy(new_argv[new_argc], argv[i]); or strlcpy(new_argv[new_argc], argv[i], MAX_EAL_PARAM_LEN); ? > + new_argc++; > + } > + > + if (eal_args) { > + strlcpy(args, eal_args, sizeof(args)); > + token_nb = rte_strsplit(args, strlen(args), > + tokens, MAX_EAL_PARAM_NB, ' '); > + for (i = 0; i < token_nb; i++) > + strcpy(new_argv[new_argc++], tokens[i]); > + } > + > + return new_argc; > +} > + > +int > +main(int argc, char *argv[]) > +{ > + int ret; > + uint16_t case_nb; > + uint32_t i, nb_lcores; > + pid_t cpid, wpid; > + int wstatus; > + char args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN]; > + char *pargs[MAX_EAL_PARAM_NB]; > + char *cfg_path_ptr = NULL; > + char *rst_path_ptr = NULL; > + char rst_path[PATH_MAX]; > + int new_argc; > + bool is_first_case = true; > + > + memset(args, 0, sizeof(args)); > + > + for (i = 0; i < RTE_DIM(pargs); i++) > + pargs[i] = args[i]; > + > + for (i = 0; i < (uint32_t)argc; i++) { > + if (strncmp(argv[i], CMDLINE_CONFIG_ARG, MAX_LONG_OPT_SZ) == 0) > + cfg_path_ptr = argv[i + 1]; > + if (strncmp(argv[i], CMDLINE_RESULT_ARG, MAX_LONG_OPT_SZ) == 0) > + rst_path_ptr = argv[i + 1]; > + } > + if (cfg_path_ptr == NULL) { > + printf("Config file not assigned.\n"); > + return -1; > + } > + if (rst_path_ptr == NULL) { > + strlcpy(rst_path, cfg_path_ptr, PATH_MAX); > + strcat(strtok(basename(rst_path), "."), "_result.csv"); > + rst_path_ptr = rst_path; > + } > + > + case_nb = load_configs(cfg_path_ptr); > + fd = fopen(rst_path_ptr, "w"); > + if (fd == NULL) { > + printf("Open output CSV file error.\n"); > + return -1; > + } > + fclose(fd); > + > + for (i = 0; i < case_nb; i++) { > + if (test_cases[i].test_type == TEST_TYPE_NONE) { > + printf("No test type in test case %d.\n\n", i + 1); > + continue; > + } > + if (!test_cases[i].is_valid) { > + printf("Invalid test case %d.\n\n", i + 1); > + continue; > + } > + > + cpid = fork(); > + if (cpid < 0) { > + printf("Fork case %d failed.\n", i + 1); > + exit(EXIT_FAILURE); > + } else if (cpid == 0) { > + printf("\nRunning case %u\n\n", i + 1); > + > + new_argc = append_eal_args(argc, argv, > test_cases[i].eal_args, pargs); > + ret = rte_eal_init(new_argc, pargs); > + if (ret < 0) > + rte_exit(EXIT_FAILURE, "Invalid EAL > arguments\n"); > + > + /* Check lcores. */ > + nb_lcores = rte_lcore_count(); > + if (nb_lcores < 2) > + rte_exit(EXIT_FAILURE, > + "There should be at least 2 worker > lcores.\n"); > + > + fd = fopen(rst_path_ptr, "a"); > + if (!fd) { > + printf("Open output CSV file error.\n"); > + return 0; > + } > + > + if (is_first_case) { > + output_env_info(); > + is_first_case = false; > + } > + run_test(i + 1, &test_cases[i]); > + > + /* clean up the EAL */ > + rte_eal_cleanup(); > + > + fclose(fd); > + > + printf("\nCase %u completed.\n\n", i + 1); > + > + exit(EXIT_SUCCESS); > + } else { > + wpid = waitpid(cpid, &wstatus, 0); > + if (wpid == -1) { > + printf("waitpid error.\n"); > + exit(EXIT_FAILURE); > + } > + > + if (WIFEXITED(wstatus)) > + printf("Case process exited. status %d\n\n", > + WEXITSTATUS(wstatus)); > + else if (WIFSIGNALED(wstatus)) > + printf("Case process killed by signal %d\n\n", > + WTERMSIG(wstatus)); > + else if (WIFSTOPPED(wstatus)) > + printf("Case process stopped by signal %d\n\n", > + WSTOPSIG(wstatus)); > + else if (WIFCONTINUED(wstatus)) > + printf("Case process continued.\n\n"); > + else > + printf("Case process unknown terminated.\n\n"); > + } > + } > + > + printf("Bye...\n"); > + return 0; > +} <snip>