Hi, Thanks for your comments, replies are inline.
Thanks a again, Cheng > -----Original Message----- > From: huangdengdui <huangdeng...@huawei.com> > Sent: Tuesday, June 13, 2023 8:55 PM > To: Jiang, Cheng1 <cheng1.ji...@intel.com>; tho...@monjalon.net; > Richardson, Bruce <bruce.richard...@intel.com>; > m...@smartsharesystems.com; Xia, Chenbo <chenbo....@intel.com>; > amitpraka...@marvell.com; ano...@marvell.com > Cc: dev@dpdk.org; Hu, Jiayu <jiayu...@intel.com>; Ding, Xuan > <xuan.d...@intel.com>; Ma, WenwuX <wenwux...@intel.com>; Wang, > YuanX <yuanx.w...@intel.com>; He, Xingguang <xingguang...@intel.com> > Subject: Re: [PATCH v6] app/dma-perf: introduce dma-perf application > > Hi Cheng, > > Few comments inline. Please check. > > Thanks, > Dengdui > > On 2023/6/13 12:31, Cheng Jiang wrote: > > There are many high-performance DMA devices supported in DPDK now, > and > > these DMA devices can also be integrated into other modules of DPDK as > > accelerators, such as Vhost. Before integrating DMA into applications, > > developers need to know the performance of these DMA devices in > > various scenarios and the performance of CPUs in the same scenario, > > such as different buffer lengths. Only in this way can we know the > > target performance of the application accelerated by using them. This > > patch introduces a high-performance testing tool, which supports > > comparing the performance of CPU and DMA in different scenarios > > automatically with a pre-set config file. Memory Copy performance test are > supported for now. > > > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > > Signed-off-by: Jiayu Hu <jiayu...@intel.com> > > Signed-off-by: Yuan Wang <yuanx.w...@intel.com> > > Acked-by: Morten Brørup <m...@smartsharesystems.com> > > Acked-by: Chenbo Xia <chenbo....@intel.com> > > --- > > v6: > > improved code based on Anoob's comments; > > fixed some code structure issues; > > v5: > > fixed some LONG_LINE warnings; > > v4: > > fixed inaccuracy of the memory footprint display; > > v3: > > fixed some typos; > > v2: > > added lcore/dmadev designation; > > added error case process; > > removed worker_threads parameter from config.ini; > > improved the logs; > > improved config file; > > > > app/meson.build | 1 + > > app/test-dma-perf/benchmark.c | 477 ++++++++++++++++++++++++++++ > > app/test-dma-perf/config.ini | 59 ++++ > > app/test-dma-perf/main.c | 569 > ++++++++++++++++++++++++++++++++++ > > app/test-dma-perf/main.h | 69 +++++ > > app/test-dma-perf/meson.build | 17 + > > 6 files changed, 1192 insertions(+) > > create mode 100644 app/test-dma-perf/benchmark.c create mode > 100644 > > app/test-dma-perf/config.ini create mode 100644 > > app/test-dma-perf/main.c create mode 100644 app/test-dma-perf/main.h > > create mode 100644 app/test-dma-perf/meson.build > > > > diff --git a/app/meson.build b/app/meson.build index > > 74d2420f67..4fc1a83eba 100644 > > --- a/app/meson.build > > +++ b/app/meson.build > > @@ -19,6 +19,7 @@ apps = [ > > 'test-cmdline', > > 'test-compress-perf', > > 'test-crypto-perf', > > + 'test-dma-perf', > > 'test-eventdev', > > 'test-fib', > > 'test-flow-perf', > > diff --git a/app/test-dma-perf/benchmark.c > > b/app/test-dma-perf/benchmark.c new file mode 100644 index > > 0000000000..bc1ca82297 > > --- /dev/null > > +++ b/app/test-dma-perf/benchmark.c > > @@ -0,0 +1,477 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2023 Intel Corporation */ > > + > > <snip> > > > +static inline int > > +__rte_format_printf(3, 4) > > +print_err(const char *func, int lineno, const char *format, ...) { > > + va_list ap; > > + int ret; > > + > > + ret = fprintf(stderr, "In %s:%d - ", func, lineno); > > + va_start(ap, format); > > + ret += vfprintf(stderr, format, ap); > > + va_end(ap); > > + > > + return ret; > > +} > > + > > +static inline void > > +calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, > uint16_t test_secs, > > + uint32_t total_cnt, float *memory, uint32_t > *ave_cycle, > > + float *bandwidth, float *mops) > > +{ > > + *memory = (float)(buf_size * (nr_buf / nb_workers) * 2) / (1024 * > 1024); > > + *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt; > > + *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) > / 1000000000; > > + *mops = (float)rte_get_timer_hz() / *ave_cycle / 1000000; > > The value of ave_cycle may be 0. > > *mops = (float)(total_cnt / test_secs) / 1000000; ? OK, it makes sense to me. I'll fix it in the next version. > > > +} > > + > > +static void > > +output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, > uint64_t ave_cycle, > > + uint32_t buf_size, uint32_t nr_buf, float memory, > > + float bandwidth, float mops, bool is_dma) { > > + if (is_dma) > > + printf("lcore %u, DMA %s:\n", lcore_id, dma_name); > > + else > > + printf("lcore %u\n", lcore_id); > > + > > + printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, > memory: %.2lfMB, frequency: %" PRIu64 ".\n", > > + ave_cycle, buf_size, nr_buf, memory, > rte_get_timer_hz()); > > + printf("Average bandwidth: %.3lfGbps, MOps: %.3lf\n", bandwidth, > > +mops); > > + > > + if (is_dma) > > + snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, > CSV_LINE_DMA_FMT, > > + scenario_id, lcore_id, dma_name, buf_size, > > + nr_buf, memory, ave_cycle, bandwidth, mops); > > + else > > + snprintf(output_str[lcore_id], MAX_OUTPUT_STR_LEN, > CSV_LINE_CPU_FMT, > > + scenario_id, lcore_id, buf_size, > > + nr_buf, memory, ave_cycle, bandwidth, mops); } > > + > > +static inline void > > +cache_flush_buf(__maybe_unused struct rte_mbuf **array, > > + __maybe_unused uint32_t buf_size, > > + __maybe_unused uint32_t nr_buf) > > +{ > > +#ifdef RTE_ARCH_X86_64 > > + char *data; > > + struct rte_mbuf **srcs = array; > > + uint32_t i, offset; > > + > > + for (i = 0; i < nr_buf; i++) { > > + data = rte_pktmbuf_mtod(srcs[i], char *); > > + for (offset = 0; offset < buf_size; offset += 64) > > + __builtin_ia32_clflush(data + offset); > > + } > > +#endif > > +} > > + > > <snip> > > > + > > +/* Parse the argument given in the command line of the application */ > > +static int append_eal_args(int argc, char **argv, const char > > +*eal_args, char **new_argv) { > > + int i; > > + char *tokens[MAX_EAL_PARAM_NB]; > > + char args[MAX_EAL_PARAM_LEN] = {0}; > > + int token_nb, new_argc = 0; > > + > > + for (i = 0; i < argc; i++) { > > + if ((strcmp(argv[i], CMDLINE_CONFIG_ARG) == 0) || > > + (strcmp(argv[i], CMDLINE_RESULT_ARG) == 0)) > { > > + i++; > > + continue; > > + } > > + strlcpy(new_argv[new_argc], argv[i], > sizeof(new_argv[new_argc])); > > The type_of argv[new_argc] is *char. Cannot use sizeof(). > > strcpy(new_argv[new_argc], argv[i]); or strlcpy(new_argv[new_argc], argv[i], > MAX_EAL_PARAM_LEN); ? Yes, it's a mistake, thanks for pointing out! I'll fix it in the next version. > > > + new_argc++; > > + } > > + > > + if (eal_args) { > > + strlcpy(args, eal_args, sizeof(args)); > > + token_nb = rte_strsplit(args, strlen(args), > > + tokens, MAX_EAL_PARAM_NB, ' '); > > + for (i = 0; i < token_nb; i++) > > + strcpy(new_argv[new_argc++], tokens[i]); > > + } > > + > > + return new_argc; > > +} > > + > > +int > > +main(int argc, char *argv[]) > > +{ > > + int ret; > > + uint16_t case_nb; > > + uint32_t i, nb_lcores; > > + pid_t cpid, wpid; > > + int wstatus; > > + char args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN]; > > + char *pargs[MAX_EAL_PARAM_NB]; > > + char *cfg_path_ptr = NULL; > > + char *rst_path_ptr = NULL; > > + char rst_path[PATH_MAX]; > > + int new_argc; > > + bool is_first_case = true; > > + > > + memset(args, 0, sizeof(args)); > > + > > + for (i = 0; i < RTE_DIM(pargs); i++) > > + pargs[i] = args[i]; > > + > > + for (i = 0; i < (uint32_t)argc; i++) { > > + if (strncmp(argv[i], CMDLINE_CONFIG_ARG, > MAX_LONG_OPT_SZ) == 0) > > + cfg_path_ptr = argv[i + 1]; > > + if (strncmp(argv[i], CMDLINE_RESULT_ARG, > MAX_LONG_OPT_SZ) == 0) > > + rst_path_ptr = argv[i + 1]; > > + } > > + if (cfg_path_ptr == NULL) { > > + printf("Config file not assigned.\n"); > > + return -1; > > + } > > + if (rst_path_ptr == NULL) { > > + strlcpy(rst_path, cfg_path_ptr, PATH_MAX); > > + strcat(strtok(basename(rst_path), "."), "_result.csv"); > > + rst_path_ptr = rst_path; > > + } > > + > > + case_nb = load_configs(cfg_path_ptr); > > + fd = fopen(rst_path_ptr, "w"); > > + if (fd == NULL) { > > + printf("Open output CSV file error.\n"); > > + return -1; > > + } > > + fclose(fd); > > + > > + for (i = 0; i < case_nb; i++) { > > + if (test_cases[i].test_type == TEST_TYPE_NONE) { > > + printf("No test type in test case %d.\n\n", i + 1); > > + continue; > > + } > > + if (!test_cases[i].is_valid) { > > + printf("Invalid test case %d.\n\n", i + 1); > > + continue; > > + } > > + > > + cpid = fork(); > > + if (cpid < 0) { > > + printf("Fork case %d failed.\n", i + 1); > > + exit(EXIT_FAILURE); > > + } else if (cpid == 0) { > > + printf("\nRunning case %u\n\n", i + 1); > > + > > + new_argc = append_eal_args(argc, argv, > test_cases[i].eal_args, pargs); > > + ret = rte_eal_init(new_argc, pargs); > > + if (ret < 0) > > + rte_exit(EXIT_FAILURE, "Invalid EAL > arguments\n"); > > + > > + /* Check lcores. */ > > + nb_lcores = rte_lcore_count(); > > + if (nb_lcores < 2) > > + rte_exit(EXIT_FAILURE, > > + "There should be at least 2 worker > lcores.\n"); > > + > > + fd = fopen(rst_path_ptr, "a"); > > + if (!fd) { > > + printf("Open output CSV file error.\n"); > > + return 0; > > + } > > + > > + if (is_first_case) { > > + output_env_info(); > > + is_first_case = false; > > + } > > + run_test(i + 1, &test_cases[i]); > > + > > + /* clean up the EAL */ > > + rte_eal_cleanup(); > > + > > + fclose(fd); > > + > > + printf("\nCase %u completed.\n\n", i + 1); > > + > > + exit(EXIT_SUCCESS); > > + } else { > > + wpid = waitpid(cpid, &wstatus, 0); > > + if (wpid == -1) { > > + printf("waitpid error.\n"); > > + exit(EXIT_FAILURE); > > + } > > + > > + if (WIFEXITED(wstatus)) > > + printf("Case process exited. status %d\n\n", > > + WEXITSTATUS(wstatus)); > > + else if (WIFSIGNALED(wstatus)) > > + printf("Case process killed by signal %d\n\n", > > + WTERMSIG(wstatus)); > > + else if (WIFSTOPPED(wstatus)) > > + printf("Case process stopped by > signal %d\n\n", > > + WSTOPSIG(wstatus)); > > + else if (WIFCONTINUED(wstatus)) > > + printf("Case process continued.\n\n"); > > + else > > + printf("Case process unknown > terminated.\n\n"); > > + } > > + } > > + > > + printf("Bye...\n"); > > + return 0; > > +} > > <snip>