> -----Original Message----- > From: Cheng Jiang <cheng1.ji...@intel.com> > Sent: Thursday, June 8, 2023 2:14 PM > To: tho...@monjalon.net; bruce.richard...@intel.com; > m...@smartsharesystems.com; chenbo....@intel.com > Cc: dev@dpdk.org; jiayu...@intel.com; xuan.d...@intel.com; > wenwux...@intel.com; yuanx.w...@intel.com; xingguang...@intel.com; > Cheng Jiang <cheng1.ji...@intel.com> > Subject: [EXT] [PATCH v5] app/dma-perf: introduce dma-perf application > > External Email > > ---------------------------------------------------------------------- > There are many high-performance DMA devices supported in DPDK now, > and these DMA devices can also be integrated into other modules of DPDK as > accelerators, such as Vhost. Before integrating DMA into applications, > developers need to know the performance of these DMA devices in various > scenarios and the performance of CPUs in the same scenario, such as > different buffer lengths. Only in this way can we know the target > performance of the application accelerated by using them. This patch > introduces a high-performance testing tool, which supports comparing the > performance of CPU and DMA in different scenarios automatically with a pre- > set config file. Memory Copy performance test are supported for now. > > Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com> > Signed-off-by: Jiayu Hu <jiayu...@intel.com> > Signed-off-by: Yuan Wang <yuanx.w...@intel.com> > Acked-by: Morten Brørup <m...@smartsharesystems.com> > Acked-by: Chenbo Xia <chenbo....@intel.com> > --- > v5: > fixed some LONG_LINE warnings; > v4: > fixed inaccuracy of the memory footprint display; > v3: > fixed some typos; > v2: > added lcore/dmadev designation; > added error case process; > removed worker_threads parameter from config.ini; > improved the logs; > improved config file; > > app/meson.build | 1 + > app/test-dma-perf/benchmark.c | 472 ++++++++++++++++++++++++++++ > app/test-dma-perf/config.ini | 59 ++++ > app/test-dma-perf/main.c | 569 > ++++++++++++++++++++++++++++++++++ > app/test-dma-perf/main.h | 69 +++++ > app/test-dma-perf/meson.build | 17 + > 6 files changed, 1187 insertions(+) > create mode 100644 app/test-dma-perf/benchmark.c create mode 100644 > app/test-dma-perf/config.ini create mode 100644 app/test-dma- > perf/main.c create mode 100644 app/test-dma-perf/main.h create mode > 100644 app/test-dma-perf/meson.build >
<snip> > + > +static inline int > +do_dma_mem_copy(void *p) > +{ > + uint16_t *para_idx = (uint16_t *)p; > + volatile struct lcore_params *para = worker_params[*para_idx]; > + volatile struct worker_info *worker_info = &(para->worker_info); > + uint16_t dev_id = para->dev_id; > + uint32_t nr_buf = para->nr_buf; > + uint16_t kick_batch = para->kick_batch; > + uint32_t buf_size = para->buf_size; > + struct rte_mbuf **srcs = para->srcs; > + struct rte_mbuf **dsts = para->dsts; > + int64_t async_cnt = 0; > + int nr_cpl = 0; > + uint32_t i; > + uint32_t poll_cnt = 0; > + > + worker_info->stop_flag = false; > + worker_info->ready_flag = true; > + > + while (!worker_info->start_flag) > + ; > + > + while (1) { > + for (i = 0; i < nr_buf; i++) { > + if (unlikely(rte_dma_copy(dev_id, > + 0, > + rte_pktmbuf_iova(srcs[i]), > + rte_pktmbuf_iova(dsts[i]), > + buf_size, > + 0) < 0)) { > + rte_dma_submit(dev_id, 0); > + while (rte_dma_burst_capacity(dev_id, 0) == > 0) { > + nr_cpl = rte_dma_completed(dev_id, > 0, MAX_DMA_CPL_NB, > + NULL, NULL); > + async_cnt -= nr_cpl; > + worker_info->total_cpl += nr_cpl; > + } > + if (rte_dma_copy(dev_id, > + 0, > + rte_pktmbuf_iova(srcs[i]), > + rte_pktmbuf_iova(dsts[i]), > + buf_size, > + 0) < 0) { > + printf("enqueue fail again at %u\n", > i); > + printf("space:%d\n", > rte_dma_burst_capacity(dev_id, 0)); > + rte_exit(EXIT_FAILURE, "DMA > enqueue failed\n"); [Amit]: On all success or failure exits, please call rte_dma_stop and rte_dma_close to exit cleanly. > + } > + } > + async_cnt++; > + > + if ((async_cnt % kick_batch) == 0) { > + rte_dma_submit(dev_id, 0); > + /* add a poll to avoid ring full */ > + nr_cpl = rte_dma_completed(dev_id, 0, > MAX_DMA_CPL_NB, NULL, NULL); > + async_cnt -= nr_cpl; > + worker_info->total_cpl += nr_cpl; > + } > + } > + > + if (worker_info->stop_flag) > + break; > + } > + > + rte_dma_submit(dev_id, 0); > + while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) { > + nr_cpl = rte_dma_completed(dev_id, 0, > MAX_DMA_CPL_NB, NULL, NULL); > + async_cnt -= nr_cpl; > + } > + > + return 0; > +} > + <snip> > + > +void > +mem_copy_benchmark(struct test_configure *cfg, bool is_dma) { > + uint16_t i; > + uint32_t offset; > + unsigned int lcore_id = 0; > + struct rte_mbuf **srcs = NULL, **dsts = NULL; > + struct lcore_dma_map_t *ldm = &cfg->lcore_dma_map; > + unsigned int buf_size = cfg->buf_size.cur; > + uint16_t kick_batch = cfg->kick_batch.cur; > + uint32_t nr_buf = cfg->nr_buf = (cfg->mem_size.cur * 1024 * 1024) / > (cfg->buf_size.cur * 2); > + uint16_t nb_workers = ldm->cnt; > + uint16_t test_secs = cfg->test_secs; > + float memory; > + uint32_t avg_cycles = 0; > + float mops; > + float bandwidth; > + > + if (setup_memory_env(cfg, &srcs, &dsts) < 0) > + goto out; > + > + if (is_dma) > + if (config_dmadevs(cfg) < 0) > + goto out; > + > + if (cfg->cache_flush) { > + cache_flush_buf(srcs, buf_size, nr_buf); > + cache_flush_buf(dsts, buf_size, nr_buf); > + rte_mb(); > + } > + > + printf("Start testing....\n"); > + > + for (i = 0; i < nb_workers; i++) { > + lcore_id = ldm->lcores[i]; > + offset = nr_buf / nb_workers * i; > + > + worker_params[i] = rte_malloc(NULL, sizeof(struct > lcore_params), 0); > + if (!worker_params[i]) { > + printf("lcore parameters malloc failure for lcore > %d\n", lcore_id); > + break; > + } > + if (is_dma) { > + worker_params[i]->dma_name = ldm- > >dma_names[i]; > + worker_params[i]->dev_id = ldm->dma_ids[i]; > + worker_params[i]->kick_batch = kick_batch; > + } > + worker_params[i]->worker_id = i; > + worker_params[i]->nr_buf = (uint32_t)(nr_buf / > nb_workers); > + worker_params[i]->buf_size = buf_size; > + worker_params[i]->test_secs = test_secs; > + worker_params[i]->srcs = srcs + offset; > + worker_params[i]->dsts = dsts + offset; > + worker_params[i]->scenario_id = cfg->scenario_id; > + worker_params[i]->lcore_id = lcore_id; > + > + if (is_dma) > + rte_eal_remote_launch(do_dma_mem_copy, (void > *)(&i), lcore_id); > + else > + rte_eal_remote_launch(do_cpu_mem_copy, (void > *)(&i), lcore_id); > + } > + > + while (1) { > + bool ready = true; > + for (i = 0; i < nb_workers; i++) { > + if (worker_params[i]->worker_info.ready_flag == > false) { > + ready = 0; > + break; > + } > + } > + if (ready) > + break; > + } > + > + for (i = 0; i < nb_workers; i++) > + worker_params[i]->worker_info.start_flag = true; > + > + usleep(TEST_WAIT_U_SECOND); > + for (i = 0; i < nb_workers; i++) > + worker_params[i]->worker_info.test_cpl = > +worker_params[i]->worker_info.total_cpl; > + > + usleep(test_secs * 1000 * 1000); > + for (i = 0; i < nb_workers; i++) > + worker_params[i]->worker_info.test_cpl = > worker_params[i]->worker_info.total_cpl - > + worker_params[i]- > >worker_info.test_cpl; > + > + for (i = 0; i < nb_workers; i++) > + worker_params[i]->worker_info.stop_flag = true; > + > + rte_eal_mp_wait_lcore(); > + > + for (i = 0; i < nb_workers; i++) { > + calc_result(buf_size, nr_buf, nb_workers, test_secs, > + worker_params[i]->worker_info.test_cpl, > + &memory, &avg_cycles, &bandwidth, &mops); > + output_result(cfg->scenario_id, worker_params[i]->lcore_id, > + worker_params[i]->dma_name, > avg_cycles, buf_size, > + nr_buf / nb_workers, memory, > bandwidth, mops, is_dma); > + } > + > +out: > + /* free env */ > + if (srcs) > + rte_pktmbuf_free_bulk(srcs, nr_buf); > + if (dsts) > + rte_pktmbuf_free_bulk(dsts, nr_buf); > + > + if (src_pool) > + rte_mempool_free(src_pool); > + if (dst_pool) > + rte_mempool_free(dst_pool); > + > + if (is_dma) { > + for (i = 0; i < nb_workers; i++) { > + printf("Stopping dmadev %d\n", ldm->dma_ids[i]); > + rte_dma_stop(ldm->dma_ids[i]); [Amit]: Below rte_dma_stop please call rte_dma_close for clean exit. <snip> > +#endif /* _MAIN_H_ */ > diff --git a/app/test-dma-perf/meson.build b/app/test-dma- > perf/meson.build new file mode 100644 index 0000000000..bd6c264002 > --- /dev/null > +++ b/app/test-dma-perf/meson.build > @@ -0,0 +1,17 @@ > +# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2019-2023 Intel > +Corporation > + > +# meson file, for building this app as part of a main DPDK build. > + > +if is_windows > + build = false > + reason = 'not supported on Windows' > + subdir_done() > +endif > + > +deps += ['dmadev', 'mbuf', 'cfgfile'] > + > +sources = files( > + 'main.c', > + 'benchmark.c', > +) > -- > 2.40.1