[dpdk-dev] [PATCH v5] vfio: change to use generic multi-process channel
Previously, vfio uses its own private channel for the secondary process to get container fd and group fd from the primary process. This patch changes to use the generic mp channel. Test: 1. Bind two NICs to vfio-pci. 2. Start the primary and secondary process. $ (symmetric_mp) -c 2 -- -p 3 --num-procs=2 --proc-id=0 $ (symmetric_mp) -c 4 --proc-type=auto -- -p 3 \ --num-procs=2 --proc-id=1 Cc: anatoly.bura...@intel.com Signed-off-by: Jianfeng Tan --- lib/librte_eal/linuxapp/eal/eal.c | 14 +- lib/librte_eal/linuxapp/eal/eal_vfio.c | 172 +-- lib/librte_eal/linuxapp/eal/eal_vfio.h | 15 +- lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 409 - 4 files changed, 136 insertions(+), 474 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 38306bf..4ca06f4 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -695,18 +695,8 @@ static int rte_eal_vfio_setup(void) return -1; vfio_enabled = rte_vfio_is_enabled("vfio"); - if (vfio_enabled) { - - /* if we are primary process, create a thread to communicate with -* secondary processes. the thread will use a socket to wait for -* requests from secondary process to send open file descriptors, -* because VFIO does not allow multiple open descriptors on a group or -* VFIO container. -*/ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_mp_sync_setup() < 0) - return -1; - } + if (vfio_enabled && vfio_mp_sync_setup() < 0) + return -1; return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index e44ae4d..d905e8e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #include @@ -42,6 +42,10 @@ vfio_get_group_fd(int iommu_group_no) int vfio_group_fd; char filename[PATH_MAX]; struct vfio_group *cur_grp; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -101,50 +105,31 @@ vfio_get_group_fd(int iommu_group_no) return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary -* process via our socket +* process via mp channel */ - else { - int socket_fd, ret; - - socket_fd = vfio_mp_sync_connect_to_primary(); - - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { - RTE_LOG(ERR, EAL, " cannot send group number!\n"); - close(socket_fd); - return -1; - } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - close(socket_fd); - return 0; - case SOCKET_OK: - vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, store it and return it */ - if (vfio_group_fd > 0) { - close(socket_fd); - cur_grp->group_no = iommu_group_no; - cur_grp->fd = vfio_group_fd; - vfio_cfg.vfio_active_groups++; - return vfio_group_fd; - } - /* fall-through on error */ - default: - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; + p->req = SOCKET_REQ_GROUP; + p->group_no = iommu_group_no; + strcpy(mp_req.name, "vfio"); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + vfio_group_fd = -1; + if (rte_mp_request(&mp_re
[dpdk-dev] [PATCH] hash: fix missing spinlock unlock in add key
Fix missing spinlock unlock during add key when key is already present. Fixes: be856325cba3 ("hash: add scalable multi-writer insertion with Intel TSX") Cc: sta...@dpdk.org Signed-off-by: Pavan Nikhilesh --- lib/librte_hash/rte_cuckoo_hash.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 9b1387b5e..a07543a29 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -552,7 +552,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * subtracting the first dummy index */ - return prim_bkt->key_idx[i] - 1; + ret = prim_bkt->key_idx[i] - 1; + goto failure; } } } @@ -572,7 +573,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * subtracting the first dummy index */ - return sec_bkt->key_idx[i] - 1; + ret = sec_bkt->key_idx[i] - 1; + goto failure; } } } -- 2.16.2
[dpdk-dev] [PATCH] pdump: change to use generic multi-process channel
The original code replies on the private channel for primary and secondary communication. Change to use the generic multi-process channel. Note with this change, dpdk-pdump will be not compatible with old version DPDK applications. Cc: reshma.pat...@intel.com Signed-off-by: Jianfeng Tan --- lib/librte_pdump/Makefile| 3 +- lib/librte_pdump/rte_pdump.c | 420 +++ lib/librte_pdump/rte_pdump.h | 1 + 3 files changed, 66 insertions(+), 358 deletions(-) diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile index 98fa752..0ee0fa1 100644 --- a/lib/librte_pdump/Makefile +++ b/lib/librte_pdump/Makefile @@ -1,11 +1,12 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2016 Intel Corporation +# Copyright(c) 2016-2018 Intel Corporation include $(RTE_SDK)/mk/rte.vars.mk # library name LIB = librte_pdump.a +CFLAGS += -DALLOW_EXPERIMENTAL_API CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 CFLAGS += -D_GNU_SOURCE LDLIBS += -lpthread diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index ec8a5d8..1dee72f 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -1,16 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2016 Intel Corporation + * Copyright(c) 2016-2018 Intel Corporation */ -#include -#include -#include -#include -#include -#include -#include -#include - #include #include #include @@ -20,12 +11,6 @@ #include "rte_pdump.h" -#define SOCKET_PATH_VAR_RUN "/var/run" -#define SOCKET_PATH_HOME "HOME" -#define DPDK_DIR "/.dpdk" -#define SOCKET_DIR "/pdump_sockets" -#define SERVER_SOCKET "%s/pdump_server_socket" -#define CLIENT_SOCKET "%s/pdump_client_socket_%d_%u" #define DEVICE_ID_SIZE 64 /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_PDUMP RTE_LOGTYPE_USER1 @@ -39,11 +24,6 @@ enum pdump_version { V1 = 1 }; -static pthread_t pdump_thread; -static int pdump_socket_fd; -static char server_socket_dir[PATH_MAX]; -static char client_socket_dir[PATH_MAX]; - struct pdump_request { uint16_t ver; uint16_t op; @@ -307,7 +287,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue, } static int -set_pdump_rxtx_cbs(struct pdump_request *p) +set_pdump_rxtx_cbs(const struct pdump_request *p) { uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue; uint16_t port; @@ -391,313 +371,49 @@ set_pdump_rxtx_cbs(struct pdump_request *p) return ret; } -/* get socket path (/var/run if root, $HOME otherwise) */ static int -pdump_get_socket_path(char *buffer, int bufsz, enum rte_pdump_socktype type) +pdump_server(const struct rte_mp_msg *mp_msg, const void *peer) { - char dpdk_dir[PATH_MAX] = {0}; - char dir[PATH_MAX] = {0}; - char *dir_home = NULL; - int ret = 0; - - if (type == RTE_PDUMP_SOCKET_SERVER && server_socket_dir[0] != 0) - snprintf(dir, sizeof(dir), "%s", server_socket_dir); - else if (type == RTE_PDUMP_SOCKET_CLIENT && client_socket_dir[0] != 0) - snprintf(dir, sizeof(dir), "%s", client_socket_dir); - else { - if (getuid() != 0) { - dir_home = getenv(SOCKET_PATH_HOME); - if (!dir_home) { - RTE_LOG(ERR, PDUMP, - "Failed to get environment variable" - " value for %s, %s:%d\n", - SOCKET_PATH_HOME, __func__, __LINE__); - return -1; - } - snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s", - dir_home, DPDK_DIR); - } else - snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s", - SOCKET_PATH_VAR_RUN, DPDK_DIR); - - mkdir(dpdk_dir, 0700); - snprintf(dir, sizeof(dir), "%s%s", - dpdk_dir, SOCKET_DIR); - } - - ret = mkdir(dir, 0700); - /* if user passed socket path is invalid, return immediately */ - if (ret < 0 && errno != EEXIST) { - RTE_LOG(ERR, PDUMP, - "Failed to create dir:%s:%s\n", dir, - strerror(errno)); - rte_errno = errno; - return -1; - } - - if (type == RTE_PDUMP_SOCKET_SERVER) - snprintf(buffer, bufsz, SERVER_SOCKET, dir); - else - snprintf(buffer, bufsz, CLIENT_SOCKET, dir, getpid(), - rte_sys_gettid()); - - return 0; -} - -static int -pdump_create_server_socket(void) -{ - int ret, socket_fd; - struct sockaddr_un addr; - socklen_t addr_len; - - ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path), - RT
[dpdk-dev] [PATCH 0/4] allow procinfo and pdump on eth vdev
As we know, we have below limitations in vdev: - dpdk-procinfo cannot get the stats of (most) vdev in primary process; - dpdk-pdump cannot dump the packets for (most) vdev in primary proces; - secondary process cannot use (most) vdev in primary process. The very first reason is that the secondary process actually does not know the existence of those vdevs as vdevs are chained on a linked list, and not shareable to secondary. In this patch series, we would like to propose a vdev sharing model like this: - As a secondary process boots, all devices (including vdev) in primary will be automatically shared. After both primary and secondary process booted, - Device add/remove in primary will be translated to device hog plug/unplug event in secondary processes. (TODO) - Device add in secondary * If that kind of device support multi-process, the secondary will request the primary to probe the device and the primary to share it to the secondary. It's not necessary to have secondary-private device in this case. (TODO) * If that kind of device does not support multi-process, the secondary will probe the device by itself, and the port id is shared among all primary/secondary processes. This patch series don't: - provide secondary data path (Rx/Tx) support for each specific vdev. How to test: Step 0: start testpmd with a vhost port; and a VM connected to the vhost port. Step 1: try using dpdk-procinfo to get the stats. $(dpdk-procinfo) --log-level=8 --no-pci -- --stats Step 2: try using dpdk-pdump to dump the packets. $(dpdk-pdump) -- --pdump 'port=0,queue=*,rx-dev=/tmp/rx.pcap' Jianfeng Tan (4): eal: bring forward multi-process channel init bus/vdev: bus scan by multi-process channel drivers/net: do not allocate rte_eth_dev_data privately drivers/net: share vdev data to secondary process drivers/bus/vdev/Makefile | 1 + drivers/bus/vdev/vdev.c | 110 ++ drivers/net/af_packet/rte_eth_af_packet.c | 42 ++-- drivers/net/bonding/rte_eth_bond_pmd.c| 13 drivers/net/failsafe/failsafe.c | 14 drivers/net/kni/rte_eth_kni.c | 25 --- drivers/net/null/rte_eth_null.c | 30 drivers/net/octeontx/octeontx_ethdev.c| 28 drivers/net/pcap/rte_eth_pcap.c | 31 + drivers/net/softnic/rte_eth_softnic.c | 19 +- drivers/net/tap/rte_eth_tap.c | 22 +++--- drivers/net/vhost/rte_eth_vhost.c | 34 - lib/librte_eal/bsdapp/eal/eal.c | 23 --- lib/librte_eal/linuxapp/eal/eal.c | 23 --- 14 files changed, 295 insertions(+), 120 deletions(-) -- 2.7.4
[dpdk-dev] [PATCH 1/4] eal: bring forward multi-process channel init
Adjust the init sequence: put mp channel init before bus scan so that we can init the vdev bus through mp channel in the secondary process before the bus scan. Signed-off-by: Jianfeng Tan --- lib/librte_eal/bsdapp/eal/eal.c | 23 +-- lib/librte_eal/linuxapp/eal/eal.c | 23 +-- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 4eafcb5..b469382 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -544,6 +544,19 @@ rte_eal_init(int argc, char **argv) return -1; } + rte_config_init(); + + /* Put mp channel init before bus scan so that we can init the vdev +* bus through mp channel in the secondary process before the bus scan. +*/ + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -583,16 +596,6 @@ rte_eal_init(int argc, char **argv) rte_srand(rte_rdtsc()); - rte_config_init(); - - if (rte_mp_channel_init() < 0) { - rte_eal_init_alert("failed to init mp channel\n"); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_errno = EFAULT; - return -1; - } - } - if (rte_eal_memory_init() < 0) { rte_eal_init_alert("Cannot init memory\n"); rte_errno = ENOMEM; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 4ca06f4..8914f91 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -769,6 +769,19 @@ rte_eal_init(int argc, char **argv) return -1; } + rte_config_init(); + + /* Put mp channel init before bus scan so that we can init the vdev +* bus through mp channel in the secondary process before the bus scan. +*/ + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -815,8 +828,6 @@ rte_eal_init(int argc, char **argv) rte_srand(rte_rdtsc()); - rte_config_init(); - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { rte_eal_init_alert("Cannot init logging."); rte_errno = ENOMEM; @@ -824,14 +835,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_mp_channel_init() < 0) { - rte_eal_init_alert("failed to init mp channel\n"); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_errno = EFAULT; - return -1; - } - } - #ifdef VFIO_PRESENT if (rte_eal_vfio_setup() < 0) { rte_eal_init_alert("Cannot init VFIO\n"); -- 2.7.4
[dpdk-dev] [PATCH 4/4] drivers/net: share vdev data to secondary process
dpdk-procinfo, as a secondary process, cannot fetch stats for vdev. This patch enables that by attaching the port from the shared data. We also fill the eth dev ops, with only some ops works in secondary process, for example, stats_get(). Note that, we still cannot Rx/Tx packets on the ports which do not support multi-process. Reported-by: Signed-off-by: Vipin Varghese Signed-off-by: Jianfeng Tan --- drivers/net/af_packet/rte_eth_af_packet.c | 17 +++-- drivers/net/bonding/rte_eth_bond_pmd.c| 13 + drivers/net/failsafe/failsafe.c | 14 ++ drivers/net/kni/rte_eth_kni.c | 12 drivers/net/null/rte_eth_null.c | 13 + drivers/net/octeontx/octeontx_ethdev.c| 14 ++ drivers/net/pcap/rte_eth_pcap.c | 13 + drivers/net/softnic/rte_eth_softnic.c | 19 --- drivers/net/tap/rte_eth_tap.c | 13 + drivers/net/vhost/rte_eth_vhost.c | 17 +++-- 10 files changed, 138 insertions(+), 7 deletions(-) diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c index 2db692f..970cf05 100644 --- a/drivers/net/af_packet/rte_eth_af_packet.c +++ b/drivers/net/af_packet/rte_eth_af_packet.c @@ -915,9 +915,22 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev) int ret = 0; struct rte_kvargs *kvlist; int sockfd = -1; + struct rte_eth_dev *eth_dev; + const char *name = rte_vdev_device_name(dev); + + RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n", name); - RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n", - rte_vdev_device_name(dev)); + if (rte_eal_process_type() == RTE_PROC_SECONDARY && + strlen(rte_vdev_device_args(dev)) == 0) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); + return -1; + } + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &ops; + return 0; + } kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); if (kvlist == NULL) { diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index c34c325..7d6dea2 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -2994,6 +2994,7 @@ bond_probe(struct rte_vdev_device *dev) uint8_t bonding_mode, socket_id/*, agg_mode*/; int arg_count, port_id; uint8_t agg_mode; + struct rte_eth_dev *eth_dev; if (!dev) return -EINVAL; @@ -3001,6 +3002,18 @@ bond_probe(struct rte_vdev_device *dev) name = rte_vdev_device_name(dev); RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name); + if (rte_eal_process_type() == RTE_PROC_SECONDARY && + strlen(rte_vdev_device_args(dev)) == 0) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); + return -1; + } + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &default_dev_ops; + return 0; + } + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), pmd_bond_init_valid_arguments); if (kvlist == NULL) diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c index c499bfb..ea9fdc6 100644 --- a/drivers/net/failsafe/failsafe.c +++ b/drivers/net/failsafe/failsafe.c @@ -294,10 +294,24 @@ static int rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) { const char *name; + struct rte_eth_dev *eth_dev; name = rte_vdev_device_name(vdev); INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY && + strlen(rte_vdev_device_args(vdev)) == 0) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); + return -1; + } + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &failsafe_ops; + return 0; + } + return fs_eth_dev_create(vdev); } diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c index 1a07089..24909c7 100644 --- a/drivers/net/kni/rte_eth_kni.c +++ b/drivers/net/kni/rte_eth_kni.c @@ -405,6 +405,18 @@ eth_kni_probe(struct rte_vdev_device *vdev) params = rte_vdev_device_args(vdev); RTE_LOG(INFO, PMD, "Initializing eth_kni f
[dpdk-dev] [PATCH 2/4] bus/vdev: bus scan by multi-process channel
To scan the vdevs in primary, we send request to primary process to obtain the names for vdevs. Only the name is shared from the primary. In probe(), the device driver is supposed to locate (or request more) the detail information from the primary. Signed-off-by: Jianfeng Tan --- drivers/bus/vdev/Makefile | 1 + drivers/bus/vdev/vdev.c | 110 ++ 2 files changed, 111 insertions(+) diff --git a/drivers/bus/vdev/Makefile b/drivers/bus/vdev/Makefile index 24d424a..bd0bb89 100644 --- a/drivers/bus/vdev/Makefile +++ b/drivers/bus/vdev/Makefile @@ -10,6 +10,7 @@ LIB = librte_bus_vdev.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API # versioning export map EXPORT_MAP := rte_bus_vdev_version.map diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index e4bc724..0a3ea52 100644 --- a/drivers/bus/vdev/vdev.c +++ b/drivers/bus/vdev/vdev.c @@ -314,6 +314,88 @@ rte_vdev_uninit(const char *name) return 0; } +struct vdev_param { +#define VDEV_SCAN_REQ 1 +#define VDEV_SCAN_ONE 2 +#define VDEV_SCAN_REP 3 + int type; + int num; + char name[RTE_DEV_NAME_MAX_LEN]; +}; + +static int vdev_plug(struct rte_device *dev); + +static int +vdev_action(const struct rte_mp_msg *mp_msg, const void *peer) +{ + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + struct rte_mp_msg mp_resp; + struct vdev_param *ou = (struct vdev_param *)&mp_resp.param; + const struct vdev_param *in = (const struct vdev_param *)mp_msg->param; + const char *devname; + int num; + + strcpy(mp_resp.name, "vdev"); + mp_resp.len_param = sizeof(*ou); + mp_resp.num_fds = 0; + + switch (in->type) { + case VDEV_SCAN_REQ: + ou->type = VDEV_SCAN_ONE; + ou->num = 1; + num = 0; + TAILQ_FOREACH(dev, &vdev_device_list, next) { + devname = rte_vdev_device_name(dev); + if (strlen(devname) == 0) + VDEV_LOG(INFO, "vdev with no name is not sent"); + VDEV_LOG(INFO, "send vdev, %s", devname); + strncpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN); + if (rte_mp_sendmsg(&mp_resp) < 0) + VDEV_LOG(ERR, "send vdev, %s, failed, %s", +devname, strerror(rte_errno)); + num++; + } + ou->type = VDEV_SCAN_REP; + ou->num = num; + if (rte_mp_reply(&mp_resp, peer) < 0) + VDEV_LOG(ERR, "Failed to reply a scan request"); + break; + case VDEV_SCAN_ONE: + VDEV_LOG(INFO, "receive vdev, %s", in->name); + dev = find_vdev(in->name); + if (dev) { + VDEV_LOG(ERR, "vdev already exists: %s", in->name); + break; + } + + devargs = alloc_devargs(in->name, NULL); + if (!devargs) { + VDEV_LOG(ERR, "failed to allocate memory"); + break; + } + + dev = calloc(1, sizeof(*dev)); + if (!dev) { + VDEV_LOG(ERR, "failed to allocate memory"); + free(devargs); + break; + } + + dev->device.devargs = devargs; + dev->device.numa_node = 0; /* to be corrected in probe() */ + dev->device.name = devargs->name; + + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); + break; + default: + VDEV_LOG(ERR, "vdev cannot recognize this message"); + } + + return 0; +} + static int vdev_scan(void) { @@ -321,6 +403,34 @@ vdev_scan(void) struct rte_devargs *devargs; struct vdev_custom_scan *custom_scan; + if (rte_mp_action_register("vdev", vdev_action) < 0 && + rte_errno != EEXIST) { + VDEV_LOG(ERR, "vdev fails to add action"); + return -1; + } + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vdev_param *req = (struct vdev_param *)mp_req.param; + struct vdev_param *resp; + + strcpy(mp_req.name, "vdev"); + mp_req.len_param = sizeof(*req); + mp_req.num_fds = 0; + req->type = VDEV_SCAN_REQ; + if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; +
[dpdk-dev] [PATCH 3/4] drivers/net: do not allocate rte_eth_dev_data privately
We introduced private rte_eth_dev_data to allow vdev to be created both in primary process and secondary process(es). This is not friendly to multi-process model, for example, it leads to port id contention issue if two processes both find the data entry is free. And to get stats of primary vdev in secondary, we must allocate from the pre-defined array so that we can find it. Suggested-by: Bruce Richardson Signed-off-by: Jianfeng Tan --- drivers/net/af_packet/rte_eth_af_packet.c | 25 +++-- drivers/net/kni/rte_eth_kni.c | 13 ++--- drivers/net/null/rte_eth_null.c | 17 +++-- drivers/net/octeontx/octeontx_ethdev.c| 14 ++ drivers/net/pcap/rte_eth_pcap.c | 18 +++--- drivers/net/tap/rte_eth_tap.c | 9 + drivers/net/vhost/rte_eth_vhost.c | 17 ++--- 7 files changed, 20 insertions(+), 93 deletions(-) diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c index 57eccfd..2db692f 100644 --- a/drivers/net/af_packet/rte_eth_af_packet.c +++ b/drivers/net/af_packet/rte_eth_af_packet.c @@ -564,25 +564,17 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, RTE_LOG(ERR, PMD, "%s: no interface specified for AF_PACKET ethdev\n", name); - goto error_early; + return -1; } RTE_LOG(INFO, PMD, "%s: creating AF_PACKET-backed ethdev on numa socket %u\n", name, numa_node); - /* -* now do all data allocation - for eth_dev structure, dummy pci driver -* and internal (private) data -*/ - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - goto error_early; - *internals = rte_zmalloc_socket(name, sizeof(**internals), 0, numa_node); if (*internals == NULL) - goto error_early; + return -1; for (q = 0; q < nb_queues; q++) { (*internals)->rx_queue[q].map = MAP_FAILED; @@ -604,24 +596,24 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, RTE_LOG(ERR, PMD, "%s: I/F name too long (%s)\n", name, pair->value); - goto error_early; + return -1; } if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCGIFINDEX)\n", name); - goto error_early; + return -1; } (*internals)->if_name = strdup(pair->value); if ((*internals)->if_name == NULL) - goto error_early; + return -1; (*internals)->if_index = ifr.ifr_ifindex; if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCGIFHWADDR)\n", name); - goto error_early; + return -1; } memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN); @@ -775,14 +767,13 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, (*internals)->nb_queues = nb_queues; - rte_memcpy(data, (*eth_dev)->data, sizeof(*data)); + data = (*eth_dev)->data; data->dev_private = *internals; data->nb_rx_queues = (uint16_t)nb_queues; data->nb_tx_queues = (uint16_t)nb_queues; data->dev_link = pmd_link; data->mac_addrs = &(*internals)->eth_addr; - (*eth_dev)->data = data; (*eth_dev)->dev_ops = &ops; return 0; @@ -802,8 +793,6 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, } free((*internals)->if_name); rte_free(*internals); -error_early: - rte_free(data); return -1; } diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c index dc4e65f..1a07089 100644 --- a/drivers/net/kni/rte_eth_kni.c +++ b/drivers/net/kni/rte_eth_kni.c @@ -337,25 +337,17 @@ eth_kni_create(struct rte_vdev_device *vdev, struct pmd_internals *internals; struct rte_eth_dev_data *data; struct rte_eth_dev *eth_dev; - const char *name; RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n", numa_node); - name = rte_vdev_device_name(vdev); - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - return NULL; - /* reserve an ethdev entry */ eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*internals)); - if (eth_dev == NULL) { - rte_free(data); + if (eth_dev == NULL) return NULL; - } internals = eth_dev->data->dev_private; - rte_memcpy(data, eth_d
Re: [dpdk-dev] [PATCH] usertools/dpdk-devbind.py: add support for avp device
Hi Yigit, Should I create an update one and send out? BR. Xiaohua Zhang -Original Message- From: Ferruh Yigit [mailto:ferruh.yi...@intel.com] Sent: Friday, March 02, 2018 11:35 PM To: Zhang, Xiaohua; dev@dpdk.org Subject: Re: [dpdk-dev] [PATCH] usertools/dpdk-devbind.py: add support for avp device On 2/26/2018 5:43 AM, Xiaohua Zhang wrote: > Signed-off-by: Xiaohua Zhang Patch title should be: usertools: add support for AVP device Except from that, Acked-by: Ferruh Yigit
Re: [dpdk-dev] 16.11.5 (LTS) patches review and test
Hi Luca, In powerpc to support i40e, we wish below patch be merged: c3def6a8724 net/i40e: implement vector PMD for altivec I have verified br-16.11 with the above commit (in cherry-pick, I needed to remove release notes which was meant for 17.05 release which hope is fine here). Could you please merge the above. Thanks, Gowrishankar On Monday 26 February 2018 05:04 PM, Luca Boccassi wrote: Hi all, Here is a list of patches targeted for LTS release 16.11.5. Please help review and test. The planned date for the final release is March the 5th, pending results from regression tests. Before that, please shout if anyone has objections with these patches being applied. These patches are located at branch 16.11 of dpdk-stable repo: http://dpdk.org/browse/dpdk-stable/ Thanks. Luca Boccassi --- Ajit Khaparde (6): net/bnxt: support new PCI IDs net/bnxt: parse checksum offload flags net/bnxt: fix group info usage net/bnxt: fix broadcast cofiguration net/bnxt: fix size of Tx ring in HW net/bnxt: fix link speed setting with autoneg off Akhil Goyal (1): examples/ipsec-secgw: fix corner case for SPI value Alejandro Lucero (3): net/nfp: fix MTU settings net/nfp: fix jumbo settings net/nfp: fix CRC strip check behaviour Anatoly Burakov (14): memzone: fix leak on allocation error malloc: protect stats with lock malloc: fix end for bounded elements vfio: fix enabled check on error app/procinfo: add compilation option in config test: register test as failed if setup failed test/table: fix uninitialized parameter test/memzone: fix wrong test test/memzone: handle previously allocated memzones usertools/devbind: remove unused function test/reorder: fix memory leak test/ring_perf: fix memory leak test/table: fix memory leak test/timer_perf: fix memory leak Andriy Berestovskyy (1): keepalive: fix state alignment Bao-Long Tran (1): examples/ip_pipeline: fix timer period unit Beilei Xing (8): net/i40e: fix flow director Rx resource defect net/i40e: add warnings when writing global registers net/i40e: add debug logs when writing global registers net/i40e: fix multiple driver support issue net/i40e: fix interrupt conflict when using multi-driver net/i40e: fix Rx interrupt net/i40e: check multi-driver option parsing app/testpmd: fix flow director filter Chas Williams (1): net/bonding: fix setting slave MAC addresses David Harton (1): net/i40e: fix VF reset stats crash Didier Pallard (1): net/virtio: fix incorrect cast Dustin Lundquist (1): examples/exception_path: align stats on cache line Erez Ferber (1): net/mlx5: fix MTU update Ferruh Yigit (1): kni: fix build with kernel 4.15 Fiona Trahe (1): crypto/qat: fix null auth algo overwrite Gowrishankar Muthukrishnan (2): eal/ppc: remove the braces in memory barrier macros eal/ppc: support sPAPR IOMMU for vfio-pci Harish Patil (2): net/qede: fix to reject config with no Rx queue net/qede/base: fix VF LRO tunnel configuration Hemant Agrawal (4): pmdinfogen: fix cross compilation for ARM big endian lpm: fix ARM big endian build net/i40e: fix ARM big endian build net/ixgbe: fix ARM big endian build Hyong Youb Kim (1): net/enic: fix crash due to static max number of queues Igor Ryzhov (1): net/i40e: fix flag for MAC address write Ilya V. Matveychikov (2): eal: update assertion macro mbuf: cleanup function to get last segment Jerin Jacob (3): net/thunderx: fix multi segment Tx function return test/crypto: fix missing include ethdev: fix data alignment Jerry Lilijun (1): net/bonding: fix activated slave in 8023ad mode Jianfeng Tan (3): vhost: fix crash net/vhost: fix log messages on create/destroy net/virtio-user: fix start with kernel vhost Junjie Chen (3): vhost: fix dequeue zero copy with virtio1 examples/vhost: fix sending ARP packet to self vhost: fix mbuf free Kefu Chai (1): contigmem: fix build on FreeBSD 12 Konstantin Ananyev (1): eal/x86: use lock-prefixed instructions for SMP barrier Liang-Min Larry Wang (1): net/ixgbe: improve link state check on VF Marko Kovacevic (2): mk: support renamed Makefile in external project mk: fix external build Markus Theil (2): igb_uio: fix IRQ disable on recent kernels igb_uio: fix MSI-X IRQ assignment with new IRQ function Matan Azrad (2): app/testpmd: fix port index in RSS forward config app/testpmd: fix port topology in RSS forward config Matej Vido (1): net/szedata2: fix check of mmap return value Maxime Coquelin (1): net/virtio: fix resuming port with Rx vector path Michael McConville (1):
Re: [dpdk-dev] [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h
> -Original Message- > From: Thomas Monjalon [mailto:tho...@monjalon.net] > Sent: Thursday, March 1, 2018 10:14 PM > To: Tan, Jianfeng > Cc: Maxime Coquelin ; Yang, Zhiyong > ; dev@dpdk.org; y...@fridaylinux.org; Bie, Tiwei > ; Wang, Zhihong ; Wang, > Dong1 > Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to > fd_man.h > > 01/03/2018 07:02, Tan, Jianfeng: > > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com] > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote: > > > > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com] > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote: > > > >>>lib/librte_vhost/Makefile | 3 +- > > > >>>lib/librte_vhost/fd_man.c | 274 > > > >>> --- > --- > > > >>>lib/librte_vhost/fd_man.h | 258 > > > >> +-- > > > >>>3 files changed, 253 insertions(+), 282 deletions(-) > > > >>>delete mode 100644 lib/librte_vhost/fd_man.c > > > >> > > > >> I disagree with the patch. > > > >> It is a good thing to reuse the code, but to do it, you need to > > > >> extend the vhost lib API. > > > >> > > > >> New API need to be prefixed with rte_vhost_, and be declared in > > > >> rte_vhost.h. > > > >> > > > >> And no need to move the functions from the .c to the .h file, as > > > >> it > > > moreover > > > >> makes you inline them, which is not necessary here. > > > > > > > > Thanks for your reviewing the series firstly, Maxime. :) > > > > > > > > I considered to do it as you said. However I still preferred this one > > > > at last. > > > > Here are my reasons. > > > > 1) As far as I know, this set of functions are used privately in > > > > librte_vhost > > > before this feature. > > > > No strong request from the perspective of DPDK application. If I > > > understand well, It is enough to expose the functions to all PMDs > > > > And it is better to keep internal use in DPDK. > > > > > > But what the patch is doing is adding fd_man.h to the API, without > > > doing it properly. fd_man.h will be installed with other header > > > files, and any external application can use it. > > > > > > > > > > > 2) These functions help to implement vhost user, but they are not > > > > strongly > > > related to other APIs of vhost user which have already exposed. > > > > if we want to expose them as APIs at lib layer, many functions and > > > > related > > > data structure has to be exposed in rte_vhost.h. it looks messy. > > > > Your opinion? > > > > > > Yes, it is not really vhost-related, it could be part of a more > > > generic library. It is maybe better to duplicate these lines, or to > > > move this code in a existing or new library. > > > > I vote to move it to generic library, maybe eal. Poll() has better > compatibility even though poll() is not as performant as epoll(). > > > > Thomas, how do you think? > > I don't see why it should be exported outside of DPDK, except for PMDs. > I would tend to keep it internal but I understand that it would mean > duplicating some code, which is not ideal. > Please could you show what would be the content of the .h in EAL? > If needed to expose them in eal.h, I think that they should be the whole fdset mechanism as followings. typedef void (*fd_cb)(int fd, void *dat, int *remove); struct fdentry { int fd; /* -1 indicates this entry is empty */ fd_cb rcb; /* callback when this fd is readable. */ fd_cb wcb; /* callback when this fd is writeable.*/ void *dat; /* fd context */ int busy; /* whether this entry is being used in cb. */ }; struct fdset { struct pollfd rwfds[MAX_FDS]; struct fdentry fd[MAX_FDS]; pthread_mutex_t fd_mutex; int num;/* current fd number of this fdset */ }; void fdset_init(struct fdset *pfdset);(not used in the patchset) int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); (used in this patchset) void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset) void *fdset_event_dispatch(void *arg); (used in this patchset) seems that we have 4 options. 1) expose them in librte_vhost 2) expose them in other existing or new libs. for example, eal. 3) duplicate the code lines at PMD layer. 4) do it as the patch does that. thanks Zhiyong