> -----Original Message----- > From: Elena Agostini <eagost...@nvidia.com> > Sent: Tuesday, September 7, 2021 00:11 > To: Jerin Jacob <jerinjac...@gmail.com> > Cc: Wang, Haiyue <haiyue.w...@intel.com>; NBU-Contact-Thomas Monjalon > <tho...@monjalon.net>; Jerin > Jacob <jer...@marvell.com>; dpdk-dev <dev@dpdk.org>; Stephen Hemminger > <step...@networkplumber.org>; > David Marchand <david.march...@redhat.com>; Andrew Rybchenko > <andrew.rybche...@oktetlabs.ru>; Honnappa > Nagarahalli <honnappa.nagaraha...@arm.com>; Yigit, Ferruh > <ferruh.yi...@intel.com>; techbo...@dpdk.org > Subject: RE: [dpdk-dev] [RFC PATCH v2 0/7] heterogeneous computing library > > >
> > > > > > I'd like to introduce (with a dedicated option) the memory API in > > > testpmd to provide an example of how to TX/RX packets using device > > memory. > > > > Not sure without embedding sideband communication mechanism how it > > can notify to GPU and back to CPU. If you could share the example API > > sequence that helps to us understand the level of coupling with testpmd. > > > > There is no need of communication mechanism here. > Assuming there is not workload to process network packets (to not complicate > things), the steps are: > 1) Create a DPDK mempool with device external memory using the hcdev (or > gpudev) library > 2) Use that mempool to tx/rx/fwd packets > > As an example, you look at my l2fwd-nv application here: > https://github.com/NVIDIA/l2fwd-nv > To enhance the 'rte_extmem_register' / 'rte_pktmbuf_pool_create_extbuf' ? if (l2fwd_mem_type == MEM_HOST_PINNED) { ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0); CUDA_CHECK(cudaHostRegister(ext_mem.buf_ptr, ext_mem.buf_len, cudaHostRegisterMapped)); void *pDevice; CUDA_CHECK(cudaHostGetDevicePointer(&pDevice, ext_mem.buf_ptr, 0)); if (pDevice != ext_mem.buf_ptr) rte_exit(EXIT_FAILURE, "GPU pointer does not match CPU pointer\n"); } else { ext_mem.buf_iova = RTE_BAD_IOVA; CUDA_CHECK(cudaMalloc(&ext_mem.buf_ptr, ext_mem.buf_len)); if (ext_mem.buf_ptr == NULL) rte_exit(EXIT_FAILURE, "Could not allocate GPU memory\n"); unsigned int flag = 1; CUresult status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, (CUdeviceptr)ext_mem.buf_ptr); if (CUDA_SUCCESS != status) { rte_exit(EXIT_FAILURE, "Could not set SYNC MEMOP attribute for GPU memory at %llx\n", (CUdeviceptr)ext_mem.buf_ptr); } ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len, NULL, ext_mem.buf_iova, GPU_PAGE_SIZE); if (ret) rte_exit(EXIT_FAILURE, "Could not register GPU memory\n"); } ret = rte_dev_dma_map(rte_eth_devices[l2fwd_port_id].device, ext_mem.buf_ptr, ext_mem.buf_iova, ext_mem.buf_len); if (ret) rte_exit(EXIT_FAILURE, "Could not DMA map EXT memory\n"); mpool_payload = rte_pktmbuf_pool_create_extbuf("payload_mpool", l2fwd_nb_mbufs, 0, 0, ext_mem.elt_size, rte_socket_id(), &ext_mem, 1); if (mpool_payload == NULL) rte_exit(EXIT_FAILURE, "Could not create EXT memory mempool\n");