Hi Bruce, Replies are inline. Really appreciate your comments. I'll fix it in the next version.
Thanks, Cheng > -----Original Message----- > From: Richardson, Bruce <bruce.richard...@intel.com> > Sent: Tuesday, January 17, 2023 11:44 PM > To: Jiang, Cheng1 <cheng1.ji...@intel.com> > Cc: tho...@monjalon.net; m...@smartsharesystems.com; dev@dpdk.org; Hu, > Jiayu <jiayu...@intel.com>; Ding, Xuan <xuan.d...@intel.com>; Ma, WenwuX > <wenwux...@intel.com>; Wang, YuanX <yuanx.w...@intel.com>; He, > Xingguang <xingguang...@intel.com> > Subject: Re: [PATCH v3] app/dma-perf: introduce dma-perf application > > On Tue, Jan 17, 2023 at 12:05:26PM +0000, Cheng Jiang wrote: > > There are many high-performance DMA devices supported in DPDK now, and > > these DMA devices can also be integrated into other modules of DPDK as > > accelerators, such as Vhost. Before integrating DMA into applications, > > developers need to know the performance of these DMA devices in > > various scenarios and the performance of CPUs in the same scenario, > > such as different buffer lengths. Only in this way can we know the > > target performance of the application accelerated by using them. This > > patch introduces a high-performance testing tool, which supports > > comparing the performance of CPU and DMA in different scenarios > > automatically with a pre-set config file. Memory Copy performance test are > supported for now. > > > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > > Signed-off-by: Jiayu Hu <jiayu...@intel.com> > > Signed-off-by: Yuan Wang <yuanx.w...@intel.com> > > Acked-by: Morten Brørup <m...@smartsharesystems.com> > > Hi, > > more review comments inline below. > > /Bruce > > > --- > > <snip> > > > eal_args=--legacy-mem --file-prefix=test > > Why using legact-mem mode? Rather than these options, just use "--in-memory" > to avoid any conflicts. While this is only an example config, we should steer > people away from legacy memory mode. Ok, got it. I'll fix it. > > > diff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c new > > file mode 100644 index 0000000000..8041f5fdaf > > --- /dev/null > > +++ b/app/test-dma-perf/main.c > > @@ -0,0 +1,434 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2022 Intel Corporation */ > > + > > +#include <stdio.h> > > +#if !defined(RTE_EXEC_ENV_LINUX) > > + > > +int > > +main(int argc, char *argv[]) > > +{ > > + printf("OS not supported, skipping test\n"); > > + return 0; > > +} > > + > > +#else > > + > > +#include <stdlib.h> > > +#include <getopt.h> > > +#include <signal.h> > > +#include <stdbool.h> > > +#include <unistd.h> > > +#include <sys/wait.h> > > +#include <inttypes.h> > > + > > +#include <rte_eal.h> > > +#include <rte_cfgfile.h> > > +#include <rte_string_fns.h> > > +#include <rte_lcore.h> > > + > > +#include "main.h" > > +#include "benchmark.h" > > + > > +#define CSV_HDR_FMT "Case %u : %s,lcore,DMA,buffer > size,nr_buf,memory(MB),cycle,bandwidth(Gbps),OPS\n" > > + > > +#define MAX_EAL_PARAM_NB 100 > > +#define MAX_EAL_PARAM_LEN 1024 > > + > > +#define DMA_MEM_COPY "DMA_MEM_COPY" > > +#define CPU_MEM_COPY "CPU_MEM_COPY" > > + > > +#define MAX_PARAMS_PER_ENTRY 4 > > + > > +enum { > > + TEST_TYPE_NONE = 0, > > + TEST_TYPE_DMA_MEM_COPY, > > + TEST_TYPE_CPU_MEM_COPY > > +}; > > + > > +#define MAX_TEST_CASES 16 > > +static struct test_configure test_cases[MAX_TEST_CASES]; > > + > > +char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN]; > > + > > +static FILE *fd; > > + > > +static void > > +output_csv(bool need_blankline) > > +{ > > + uint32_t i; > > + > > + if (need_blankline) { > > + fprintf(fd, "%s", ",,,,,,,,\n"); > > + fprintf(fd, "%s", ",,,,,,,,\n"); > you don't need the "%s" here. The string you are outputting is constant. Sure, sorry about that. > > + } > > + > > + for (i = 0; i < RTE_DIM(output_str); i++) { > > + if (output_str[i][0]) { > > + fprintf(fd, "%s", output_str[i]); > > + memset(output_str[i], 0, MAX_OUTPUT_STR_LEN); > > Rather than zeroing the whole string with memset, would "output_str[i][0] = > '\0';" not work instead? Good point. I'll try it in the next version. > > > + } > > + } > > + > > + fflush(fd); > > +} > > + > > +static void > > +output_env_info(void) > > +{ > > + snprintf(output_str[0], MAX_OUTPUT_STR_LEN, "test environment:\n"); > > + snprintf(output_str[1], MAX_OUTPUT_STR_LEN, "frequency,%" PRIu64 > > +"\n", rte_get_timer_hz()); > > + > > + output_csv(true); > > +} > > + > > +static void > > +output_header(uint32_t case_id, struct test_configure *case_cfg) { > > + snprintf(output_str[0], MAX_OUTPUT_STR_LEN, > > + CSV_HDR_FMT, case_id, case_cfg->test_type_str); > > + > > + output_csv(true); > > +} > > + > > +static void > > +run_test_case(struct test_configure *case_cfg) { > > + switch (case_cfg->test_type) { > > + case TEST_TYPE_DMA_MEM_COPY: > > + dma_mem_copy_benchmark(case_cfg); > > + break; > > + case TEST_TYPE_CPU_MEM_COPY: > > + cpu_mem_copy_benchmark(case_cfg); > > + break; > > + default: > > + printf("Unknown test type. %s\n", case_cfg->test_type_str); > > + break; > > + } > > +} > > + > > +static void > > +run_test(uint32_t case_id, struct test_configure *case_cfg) { > > + uint32_t i; > > + uint32_t nb_lcores = rte_lcore_count(); > > + struct test_configure_entry *mem_size = &case_cfg->mem_size; > > + struct test_configure_entry *buf_size = &case_cfg->buf_size; > > + struct test_configure_entry *ring_size = &case_cfg->ring_size; > > + struct test_configure_entry *kick_batch = &case_cfg->kick_batch; > > + struct test_configure_entry *var_entry = NULL; > > + > > + for (i = 0; i < RTE_DIM(output_str); i++) > > + memset(output_str[i], 0, MAX_OUTPUT_STR_LEN); > > + > > + if (nb_lcores <= case_cfg->nb_workers) { > > + printf("Case %u: Not enough lcores (%u) for all workers > > (%u).\n", > > + case_id, nb_lcores, case_cfg->nb_workers); > > + return; > > + } > > + > > + RTE_LOG(INFO, DMA, "Number of used lcores: %u.\n", nb_lcores); > > + > > + if (mem_size->incr != 0) > > + var_entry = mem_size; > > + > > + if (buf_size->incr != 0) > > + var_entry = buf_size; > > + > > + if (ring_size->incr != 0) > > + var_entry = ring_size; > > + > > + if (kick_batch->incr != 0) > > + var_entry = kick_batch; > > + > > + case_cfg->scenario_id = 0; > > + > > + output_header(case_id, case_cfg); > > + > > + if (var_entry) { > > Things may be a bit simpler if instead of branching here, you initialize > var_entry > to a null var_entry i.e. > > struct test_configure_entry dummy = { 0 }; > struct test_configure_entry *var_entry = &dummy; > > This gives you a single-iteration loop in the case where there is nothing to > vary. I'll consider it, thanks! > > > + for (var_entry->cur = var_entry->first; var_entry->cur <= > var_entry->last;) { > > + case_cfg->scenario_id++; > > + printf("\nRunning scenario %d\n", case_cfg- > >scenario_id); > > + > > + run_test_case(case_cfg); > > + output_csv(false); > > + > > + if (var_entry->op == OP_MUL) > > + var_entry->cur *= var_entry->incr; > > + else > > + var_entry->cur += var_entry->incr; > > + > > + > > + } > > + } else { > > + run_test_case(case_cfg); > > + output_csv(false); > > + } > > +} > > + > > +static int > > +parse_entry(const char *value, struct test_configure_entry *entry) { > > + char input[255] = {0}; > > + char *args[MAX_PARAMS_PER_ENTRY]; > > + int args_nr = -1; > > + > > + strncpy(input, value, 254); > > + if (*input == '\0') > > + goto out; > > + > > + args_nr = rte_strsplit(input, strlen(input), args, > MAX_PARAMS_PER_ENTRY, ','); > > + if (args_nr <= 0) > > + goto out; > > + > > + entry->cur = entry->first = (uint32_t)atoi(args[0]); > > + entry->last = args_nr > 1 ? (uint32_t)atoi(args[1]) : 0; > > + entry->incr = args_nr > 2 ? (uint32_t)atoi(args[2]) : 0; > > + > > + if (args_nr > 3) { > > + if (!strcmp(args[3], "MUL")) > > + entry->op = OP_MUL; > > + else > > + entry->op = OP_ADD; > > This means accepting invalid input. I think you should check the value against > "ADD" so as to reject values like "SUB". You are right, thanks! I'll fix it. > > > + } else > > + entry->op = OP_NONE; > > +out: > > + return args_nr; > > +} > > + > > +static void > > +load_configs(void) > > +{ > > + struct rte_cfgfile *cfgfile; > > + int nb_sections, i; > > + struct test_configure *test_case; > > + char **sections_name; > > + const char *section_name, *case_type; > > + const char *mem_size_str, *buf_size_str, *ring_size_str, > *kick_batch_str; > > + int args_nr, nb_vp; > > + > > + sections_name = malloc(MAX_TEST_CASES * sizeof(char *)); > > + for (i = 0; i < MAX_TEST_CASES; i++) > > + sections_name[i] = malloc(CFG_NAME_LEN * sizeof(char *)); > > + > > I don't think you need to do this work, allocating space for a bunch of > section > names. From the example, it looks like the sections should be called "case1", > "case2" etc., so you can just iterate through those sections, rather than > allowing > sections to have arbitrary names. Good point, I'll try it in the next version. > > > + cfgfile = rte_cfgfile_load("./config.ini", 0); > > + if (!cfgfile) { > > + printf("Open configure file error.\n"); > > + exit(1); > > + } > > Don't hard-code the config file name. This should be taken from a commandline > parameter, so that one can have collections of different test cases. Yes, you are right, I'll fix it. > > > + > > + nb_sections = rte_cfgfile_num_sections(cfgfile, NULL, 0); > > + if (nb_sections > MAX_TEST_CASES) { > > + printf("Error: The maximum number of cases is %d.\n", > MAX_TEST_CASES); > > + exit(1); > > + } > > + rte_cfgfile_sections(cfgfile, sections_name, MAX_TEST_CASES); > > + for (i = 0; i < nb_sections; i++) { > > Iterate through names here, built up dynamically to save memory space. Sure. > > > + test_case = &test_cases[i]; > > + section_name = sections_name[i]; > > + case_type = rte_cfgfile_get_entry(cfgfile, section_name, > "type"); > > + if (!case_type) { > > + printf("Error: No case type in case %d\n.", i + 1); > > + exit(1); > > + } > > + if (!strcmp(case_type, DMA_MEM_COPY)) { > > Coding standard for DPDK requires this to be "strcmp(...) == 0" rather than > using > "!" operator. OK, got it. > > > + test_case->test_type = TEST_TYPE_DMA_MEM_COPY; > > + test_case->test_type_str = DMA_MEM_COPY; > > + } else if (!strcmp(case_type, CPU_MEM_COPY)) { > > + test_case->test_type = TEST_TYPE_CPU_MEM_COPY; > > + test_case->test_type_str = CPU_MEM_COPY; > > + } else { > > + printf("Error: Cannot find case type %s.\n", case_type); > > + exit(1); > > + } > > + > > + nb_vp = 0; > > + > > + test_case->src_numa_node = > (int)atoi(rte_cfgfile_get_entry(cfgfile, > > + section_name, > "src_numa_node")); > > + test_case->dst_numa_node = > (int)atoi(rte_cfgfile_get_entry(cfgfile, > > + section_name, > "dst_numa_node")); > > + > > + mem_size_str = rte_cfgfile_get_entry(cfgfile, section_name, > "mem_size"); > > + args_nr = parse_entry(mem_size_str, &test_case->mem_size); > > + if (args_nr < 0) { > > + printf("parse error\n"); > > + break; > > + } else if (args_nr > 1) > > + nb_vp++; > > + > > + buf_size_str = rte_cfgfile_get_entry(cfgfile, section_name, > "buf_size"); > > + args_nr = parse_entry(buf_size_str, &test_case->buf_size); > > + if (args_nr < 0) { > > + printf("parse error\n"); > > + break; > > + } else if (args_nr > 1) > > + nb_vp++; > > + > > + ring_size_str = rte_cfgfile_get_entry(cfgfile, section_name, > "dma_ring_size"); > > + args_nr = parse_entry(ring_size_str, &test_case->ring_size); > > + if (args_nr < 0) { > > + printf("parse error\n"); > > + break; > > + } else if (args_nr > 1) > > + nb_vp++; > > + > > + kick_batch_str = rte_cfgfile_get_entry(cfgfile, section_name, > "kick_batch"); > > + args_nr = parse_entry(kick_batch_str, &test_case->kick_batch); > > + if (args_nr < 0) { > > + printf("parse error\n"); > > + break; > > + } else if (args_nr > 1) > > + nb_vp++; > > + > > + if (nb_vp > 2) { > > + printf("%s, variable parameters can only have one.\n", > > +section_name); > > Reword to: "Error, each section can only have a single variable parameter" > Also, comparison should be ">= 2" (or "> 1") rather than "> 2", which would > allow 2 as a valid value. Sure, thanks. > > > + break; > > + } > > + > > + test_case->cache_flush = > > + (int)atoi(rte_cfgfile_get_entry(cfgfile, section_name, > "cache_flush")); > > + test_case->repeat_times = > > + (uint32_t)atoi(rte_cfgfile_get_entry(cfgfile, > > + section_name, "repeat_times")); > > + test_case->nb_workers = > > + (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile, > > + section_name, "worker_threads")); > > + test_case->mpool_iter_step = > > + (uint16_t)atoi(rte_cfgfile_get_entry(cfgfile, > > + section_name, "mpool_iter_step")); > > + > > + test_case->eal_args = rte_cfgfile_get_entry(cfgfile, > section_name, "eal_args"); > > + } > > + > > + rte_cfgfile_close(cfgfile); > > + for (i = 0; i < MAX_TEST_CASES; i++) { > > + if (sections_name[i] != NULL) > > + free(sections_name[i]); > > Two points here: > > 1. You don't need to check for NULL before calling "free()". Free just does > nothing if passed a null pointer > > 2. None of these values should be NULL anyway, and you need to check the > return from the malloc call. If you *do* keep the current way of reading > sections > (and I recommend you don't - see my comments above), you need to check that > each malloc succeeds or else the call to "rte_cfgfile_sections" > will try and do a strlcpy to a null pointer. Sure, got it! > > > + } > > + free(sections_name); > > +} > > + > > +/* Parse the argument given in the command line of the application */ > > +static int append_eal_args(int argc, char **argv, const char > > +*eal_args, char **new_argv) { > > + int i; > > + char *tokens[MAX_EAL_PARAM_NB]; > > + char args[MAX_EAL_PARAM_LEN] = {0}; > > + int new_argc, token_nb; > > + > > + new_argc = argc; > > + > > + for (i = 0; i < argc; i++) > > + strcpy(new_argv[i], argv[i]); > > I'm not sure we have a guarantee that new_argv will be big enough, do we? > Better to use strlcpy just in case here. I agree with you, we don't have a guarantee for that. I'll fix it. > > > + > > + if (eal_args) { > > + strcpy(args, eal_args); > > Use strlcpy for safety. Sure. > > > + token_nb = rte_strsplit(args, strlen(args), > > + tokens, MAX_EAL_PARAM_NB, ' '); > > + for (i = 0; i < token_nb; i++) > > + strcpy(new_argv[new_argc++], tokens[i]); > > + } > > + > > + return new_argc; > > +} > > + > > +int > > +main(int argc __maybe_unused, char *argv[] __maybe_unused) { > > + int ret; > > + uint32_t i, nb_lcores; > > + pid_t cpid, wpid; > > + int wstatus; > > + char args[MAX_EAL_PARAM_NB][MAX_EAL_PARAM_LEN]; > > + char *pargs[100]; > > char *pargs[MAX_EAL_PARAM_NB] ?? Sure, sorry about that. > > > + int new_argc; > > + > > + > > + memset(args, 0, sizeof(args)); > > + for (i = 0; i < 100; i++) > > RTE_DIM(pargs) Sure. > > > + pargs[i] = args[i]; > > + > > + load_configs(); > > + fd = fopen("./test_result.csv", "w"); > > Like the config file, the result output file should be configurable. > Perhaps it should be based off the config file name? > > test1.ini => test1_result.csv > config.ini => config_result.csv Good point, thanks! > > > + if (!fd) { > > + printf("Open output CSV file error.\n"); > > + return 0; > > + } > > + fclose(fd); > > + > > + /* loop each case, run it */ > > + for (i = 0; i < MAX_TEST_CASES; i++) { > > + if (test_cases[i].test_type != TEST_TYPE_NONE) { > > Flip this condition to reduce indentation: > > if (test_cases[i].test_type == TEST_TYPE_NONE) > continue; Sure. > > > + cpid = fork(); > > + if (cpid < 0) { > > + printf("Fork case %d failed.\n", i + 1); > > + exit(EXIT_FAILURE); > > + } else if (cpid == 0) { > > + printf("\nRunning case %u\n", i + 1); > > + > > + if (test_cases[i].eal_args) { > > + new_argc = append_eal_args(argc, > argv, > > + test_cases[i].eal_args, pargs); > > + > > + ret = rte_eal_init(new_argc, pargs); > > + } else { > > You don't need this if-else here. The append_eal_args function handles a NULL > parameter, so unconditionally call append_eal_args and then eal_init(new_argc, > pargs). We won't notice the different in init time, but the code would be > clearer. OK, got it. > > > + ret = rte_eal_init(argc, argv); > > + } > > + if (ret < 0) > > + rte_exit(EXIT_FAILURE, "Invalid EAL > arguments\n"); > > + > > + /* Check lcores. */ > > + nb_lcores = rte_lcore_count(); > > + if (nb_lcores < 2) > > + rte_exit(EXIT_FAILURE, > > + "There should be at least 2 > worker lcores.\n"); > > + > > + fd = fopen("./test_result.csv", "a"); > > + if (!fd) { > > + printf("Open output CSV file error.\n"); > > + return 0; > > + } > > + > > + if (i == 0) > > + output_env_info(); > > Beware that you have a condition above to skip any test cases where "test_type > == TEST_TYPE_NONE". Therefore, if the first test is of type NONE, the env_info > will never be printed. I'll fix it in the next version. > > > + run_test(i + 1, &test_cases[i]); > > + > > + /* clean up the EAL */ > > + rte_eal_cleanup(); > > + > > + fclose(fd); > > + > > + printf("\nCase %u completed.\n", i + 1); > > + > > + exit(EXIT_SUCCESS); > > + } else { > > + wpid = waitpid(cpid, &wstatus, 0); > > + if (wpid == -1) { > > + printf("waitpid error.\n"); > > + exit(EXIT_FAILURE); > > + } > > + > > + if (WIFEXITED(wstatus)) > > + printf("Case process exited. > status %d\n", > > + WEXITSTATUS(wstatus)); > > + else if (WIFSIGNALED(wstatus)) > > + printf("Case process killed by > signal %d\n", > > + WTERMSIG(wstatus)); > > + else if (WIFSTOPPED(wstatus)) > > + printf("Case process stopped by > signal %d\n", > > + WSTOPSIG(wstatus)); > > + else if (WIFCONTINUED(wstatus)) > > + printf("Case process continued.\n"); > > + else > > + printf("Case process unknown > terminated.\n"); > > + } > > + } > > + } > > + > > + printf("Bye...\n"); > > + return 0; > > +} > > + > > +#endif > > diff --git a/app/test-dma-perf/main.h b/app/test-dma-perf/main.h new > > file mode 100644 index 0000000000..a8fcf4f34d > > --- /dev/null > > +++ b/app/test-dma-perf/main.h > > @@ -0,0 +1,57 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2022 Intel Corporation */ > > + > > +#ifndef _MAIN_H_ > > +#define _MAIN_H_ > > + > > + > > +#include <rte_common.h> > > +#include <rte_cycles.h> > > + > > +#ifndef __maybe_unused > > +#define __maybe_unused __rte_unused > > +#endif > > + > > +#define MAX_WORKER_NB 128 > > +#define MAX_OUTPUT_STR_LEN 512 > > + > > +#define RTE_LOGTYPE_DMA RTE_LOGTYPE_USER1 > > + > > While there are a number of RTE_LOG calls in the app, I also see a number of > regular printfs for output. Again, please standardize on one output - if > using just > printf, you can drop this line. Sure, I'll fix it. > > > +extern char output_str[MAX_WORKER_NB][MAX_OUTPUT_STR_LEN]; > > + > > +typedef enum { > > + OP_NONE = 0, > > + OP_ADD, > > + OP_MUL > > +} alg_op_type; > > + > > +struct test_configure_entry { > > + uint32_t first; > > + uint32_t last; > > + uint32_t incr; > > + alg_op_type op; > > + uint32_t cur; > > +}; > > + > > +struct test_configure { > > + uint8_t test_type; > > + const char *test_type_str; > > + uint16_t src_numa_node; > > + uint16_t dst_numa_node; > > + uint16_t opcode; > > + bool is_dma; > > + struct test_configure_entry mem_size; > > + struct test_configure_entry buf_size; > > + struct test_configure_entry ring_size; > > + struct test_configure_entry kick_batch; > > + uint32_t cache_flush; > > + uint32_t nr_buf; > > + uint32_t repeat_times; > > + uint32_t nb_workers; > > + uint16_t mpool_iter_step; > > + const char *eal_args; > > + uint8_t scenario_id; > > +}; > > + > > +#endif /* _MAIN_H_ */ > > diff --git a/app/test-dma-perf/meson.build > > b/app/test-dma-perf/meson.build new file mode 100644 index > > 0000000000..001f67f6c1 > > --- /dev/null > > +++ b/app/test-dma-perf/meson.build > > @@ -0,0 +1,20 @@ > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2019-2022 > > +Intel Corporation > > 2023 now Got it. > > > + > > +# meson file, for building this example as part of a main DPDK build. > > +# > > +# To build this example as a standalone application with an > > +already-installed # DPDK instance, use 'make' > > Drop this comment. The test apps in "app" folder are for building as part of > DPDK only, there is no makefile to use. Sure. > > > + > > +if is_windows > > + build = false > > + reason = 'not supported on Windows' > > + subdir_done() > > +endif > > + > > +deps += ['dmadev', 'mbuf', 'cfgfile'] > > + > > +sources = files( > > + 'main.c', > > + 'benchmark.c', > > +) > > -- > > 2.35.1 > >