> -----Original Message-----
> From: Elena Agostini <eagost...@nvidia.com>
> Sent: Tuesday, September 7, 2021 00:11
> To: Jerin Jacob <jerinjac...@gmail.com>
> Cc: Wang, Haiyue <haiyue.w...@intel.com>; NBU-Contact-Thomas Monjalon 
> <tho...@monjalon.net>; Jerin
> Jacob <jer...@marvell.com>; dpdk-dev <dev@dpdk.org>; Stephen Hemminger 
> <step...@networkplumber.org>;
> David Marchand <david.march...@redhat.com>; Andrew Rybchenko 
> <andrew.rybche...@oktetlabs.ru>; Honnappa
> Nagarahalli <honnappa.nagaraha...@arm.com>; Yigit, Ferruh 
> <ferruh.yi...@intel.com>; techbo...@dpdk.org
> Subject: RE: [dpdk-dev] [RFC PATCH v2 0/7] heterogeneous computing library
> 
> 
> 


> > >
> > > I'd like to introduce (with a dedicated option) the memory API in
> > > testpmd to provide an example of how to TX/RX packets using device
> > memory.
> >
> > Not sure without embedding sideband communication mechanism how it
> > can notify to GPU and back to CPU. If you could share the example API
> > sequence that helps to us understand the level of coupling with testpmd.
> >
> 
> There is no need of communication mechanism here.
> Assuming there is not workload to process network packets (to not complicate
> things), the steps are:
> 1) Create a DPDK mempool with device external memory using the hcdev (or 
> gpudev) library
> 2) Use that mempool to tx/rx/fwd packets
> 
> As an example, you look at my l2fwd-nv application here: 
> https://github.com/NVIDIA/l2fwd-nv
> 

To enhance the 'rte_extmem_register' / 'rte_pktmbuf_pool_create_extbuf' ?

        if (l2fwd_mem_type == MEM_HOST_PINNED) {
                ext_mem.buf_ptr = rte_malloc("extmem", ext_mem.buf_len, 0);
                CUDA_CHECK(cudaHostRegister(ext_mem.buf_ptr, ext_mem.buf_len, 
cudaHostRegisterMapped));
                void *pDevice;
                CUDA_CHECK(cudaHostGetDevicePointer(&pDevice, ext_mem.buf_ptr, 
0));
                if (pDevice != ext_mem.buf_ptr)
                        rte_exit(EXIT_FAILURE, "GPU pointer does not match CPU 
pointer\n");
        } else {
                ext_mem.buf_iova = RTE_BAD_IOVA;
                CUDA_CHECK(cudaMalloc(&ext_mem.buf_ptr, ext_mem.buf_len));
                if (ext_mem.buf_ptr == NULL)
                        rte_exit(EXIT_FAILURE, "Could not allocate GPU 
memory\n");

                unsigned int flag = 1;
                CUresult status = cuPointerSetAttribute(&flag, 
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, (CUdeviceptr)ext_mem.buf_ptr);
                if (CUDA_SUCCESS != status) {
                        rte_exit(EXIT_FAILURE, "Could not set SYNC MEMOP 
attribute for GPU memory at %llx\n", (CUdeviceptr)ext_mem.buf_ptr);
                }
                ret = rte_extmem_register(ext_mem.buf_ptr, ext_mem.buf_len, 
NULL, ext_mem.buf_iova, GPU_PAGE_SIZE);
                if (ret)
                        rte_exit(EXIT_FAILURE, "Could not register GPU 
memory\n");
        }
        ret = rte_dev_dma_map(rte_eth_devices[l2fwd_port_id].device, 
ext_mem.buf_ptr, ext_mem.buf_iova, ext_mem.buf_len);
        if (ret)
                rte_exit(EXIT_FAILURE, "Could not DMA map EXT memory\n");
        mpool_payload = rte_pktmbuf_pool_create_extbuf("payload_mpool", 
l2fwd_nb_mbufs,
                                                                                
        0, 0, ext_mem.elt_size, 
                                                                                
        rte_socket_id(), &ext_mem, 1);
        if (mpool_payload == NULL)
                rte_exit(EXIT_FAILURE, "Could not create EXT memory mempool\n");



Reply via email to