[dpdk-dev] [PATCH v5] vfio: change to use generic multi-process channel

2018-03-04 Thread Jianfeng Tan
Previously, vfio uses its own private channel for the secondary
process to get container fd and group fd from the primary process.

This patch changes to use the generic mp channel.

Test:
  1. Bind two NICs to vfio-pci.

  2. Start the primary and secondary process.
$ (symmetric_mp) -c 2 -- -p 3 --num-procs=2 --proc-id=0
$ (symmetric_mp) -c 4 --proc-type=auto -- -p 3 \
--num-procs=2 --proc-id=1

Cc: anatoly.bura...@intel.com

Signed-off-by: Jianfeng Tan 
---
 lib/librte_eal/linuxapp/eal/eal.c  |  14 +-
 lib/librte_eal/linuxapp/eal/eal_vfio.c | 172 +--
 lib/librte_eal/linuxapp/eal/eal_vfio.h |  15 +-
 lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 409 -
 4 files changed, 136 insertions(+), 474 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 38306bf..4ca06f4 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -695,18 +695,8 @@ static int rte_eal_vfio_setup(void)
return -1;
vfio_enabled = rte_vfio_is_enabled("vfio");
 
-   if (vfio_enabled) {
-
-   /* if we are primary process, create a thread to communicate 
with
-* secondary processes. the thread will use a socket to wait for
-* requests from secondary process to send open file 
descriptors,
-* because VFIO does not allow multiple open descriptors on a 
group or
-* VFIO container.
-*/
-   if (internal_config.process_type == RTE_PROC_PRIMARY &&
-   vfio_mp_sync_setup() < 0)
-   return -1;
-   }
+   if (vfio_enabled && vfio_mp_sync_setup() < 0)
+   return -1;
 
return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index e44ae4d..d905e8e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include 
@@ -42,6 +42,10 @@ vfio_get_group_fd(int iommu_group_no)
int vfio_group_fd;
char filename[PATH_MAX];
struct vfio_group *cur_grp;
+   struct rte_mp_msg mp_req, *mp_rep;
+   struct rte_mp_reply mp_reply;
+   struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+   struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
/* check if we already have the group descriptor open */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -101,50 +105,31 @@ vfio_get_group_fd(int iommu_group_no)
return vfio_group_fd;
}
/* if we're in a secondary process, request group fd from the primary
-* process via our socket
+* process via mp channel
 */
-   else {
-   int socket_fd, ret;
-
-   socket_fd = vfio_mp_sync_connect_to_primary();
-
-   if (socket_fd < 0) {
-   RTE_LOG(ERR, EAL, "  cannot connect to primary 
process!\n");
-   return -1;
-   }
-   if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) 
{
-   RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-   close(socket_fd);
-   return -1;
-   }
-   if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
-   RTE_LOG(ERR, EAL, "  cannot send group number!\n");
-   close(socket_fd);
-   return -1;
-   }
-   ret = vfio_mp_sync_receive_request(socket_fd);
-   switch (ret) {
-   case SOCKET_NO_FD:
-   close(socket_fd);
-   return 0;
-   case SOCKET_OK:
-   vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
-   /* if we got the fd, store it and return it */
-   if (vfio_group_fd > 0) {
-   close(socket_fd);
-   cur_grp->group_no = iommu_group_no;
-   cur_grp->fd = vfio_group_fd;
-   vfio_cfg.vfio_active_groups++;
-   return vfio_group_fd;
-   }
-   /* fall-through on error */
-   default:
-   RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-   close(socket_fd);
-   return -1;
+   p->req = SOCKET_REQ_GROUP;
+   p->group_no = iommu_group_no;
+   strcpy(mp_req.name, "vfio");
+   mp_req.len_param = sizeof(*p);
+   mp_req.num_fds = 0;
+
+   vfio_group_fd = -1;
+   if (rte_mp_request(&mp_re

[dpdk-dev] [PATCH] hash: fix missing spinlock unlock in add key

2018-03-04 Thread Pavan Nikhilesh
Fix missing spinlock unlock during add key when key is already present.

Fixes: be856325cba3 ("hash: add scalable multi-writer insertion with Intel TSX")
Cc: sta...@dpdk.org

Signed-off-by: Pavan Nikhilesh 
---
 lib/librte_hash/rte_cuckoo_hash.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c 
b/lib/librte_hash/rte_cuckoo_hash.c
index 9b1387b5e..a07543a29 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -552,7 +552,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
 * Return index where key is stored,
 * subtracting the first dummy index
 */
-   return prim_bkt->key_idx[i] - 1;
+   ret = prim_bkt->key_idx[i] - 1;
+   goto failure;
}
}
}
@@ -572,7 +573,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
 * Return index where key is stored,
 * subtracting the first dummy index
 */
-   return sec_bkt->key_idx[i] - 1;
+   ret = sec_bkt->key_idx[i] - 1;
+   goto failure;
}
}
}
-- 
2.16.2



[dpdk-dev] [PATCH] pdump: change to use generic multi-process channel

2018-03-04 Thread Jianfeng Tan
The original code replies on the private channel for primary and
secondary communication. Change to use the generic multi-process
channel.

Note with this change, dpdk-pdump will be not compatible with
old version DPDK applications.

Cc: reshma.pat...@intel.com

Signed-off-by: Jianfeng Tan 
---
 lib/librte_pdump/Makefile|   3 +-
 lib/librte_pdump/rte_pdump.c | 420 +++
 lib/librte_pdump/rte_pdump.h |   1 +
 3 files changed, 66 insertions(+), 358 deletions(-)

diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 98fa752..0ee0fa1 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2016 Intel Corporation
+# Copyright(c) 2016-2018 Intel Corporation
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
 # library name
 LIB = librte_pdump.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 CFLAGS += -D_GNU_SOURCE
 LDLIBS += -lpthread
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index ec8a5d8..1dee72f 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -1,16 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016 Intel Corporation
+ * Copyright(c) 2016-2018 Intel Corporation
  */
 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
 #include 
 #include 
 #include 
@@ -20,12 +11,6 @@
 
 #include "rte_pdump.h"
 
-#define SOCKET_PATH_VAR_RUN "/var/run"
-#define SOCKET_PATH_HOME "HOME"
-#define DPDK_DIR "/.dpdk"
-#define SOCKET_DIR   "/pdump_sockets"
-#define SERVER_SOCKET "%s/pdump_server_socket"
-#define CLIENT_SOCKET "%s/pdump_client_socket_%d_%u"
 #define DEVICE_ID_SIZE 64
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_PDUMP RTE_LOGTYPE_USER1
@@ -39,11 +24,6 @@ enum pdump_version {
V1 = 1
 };
 
-static pthread_t pdump_thread;
-static int pdump_socket_fd;
-static char server_socket_dir[PATH_MAX];
-static char client_socket_dir[PATH_MAX];
-
 struct pdump_request {
uint16_t ver;
uint16_t op;
@@ -307,7 +287,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, 
uint16_t queue,
 }
 
 static int
-set_pdump_rxtx_cbs(struct pdump_request *p)
+set_pdump_rxtx_cbs(const struct pdump_request *p)
 {
uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue;
uint16_t port;
@@ -391,313 +371,49 @@ set_pdump_rxtx_cbs(struct pdump_request *p)
return ret;
 }
 
-/* get socket path (/var/run if root, $HOME otherwise) */
 static int
-pdump_get_socket_path(char *buffer, int bufsz, enum rte_pdump_socktype type)
+pdump_server(const struct rte_mp_msg *mp_msg, const void *peer)
 {
-   char dpdk_dir[PATH_MAX] = {0};
-   char dir[PATH_MAX] = {0};
-   char *dir_home = NULL;
-   int ret = 0;
-
-   if (type == RTE_PDUMP_SOCKET_SERVER && server_socket_dir[0] != 0)
-   snprintf(dir, sizeof(dir), "%s", server_socket_dir);
-   else if (type == RTE_PDUMP_SOCKET_CLIENT && client_socket_dir[0] != 0)
-   snprintf(dir, sizeof(dir), "%s", client_socket_dir);
-   else {
-   if (getuid() != 0) {
-   dir_home = getenv(SOCKET_PATH_HOME);
-   if (!dir_home) {
-   RTE_LOG(ERR, PDUMP,
-   "Failed to get environment variable"
-   " value for %s, %s:%d\n",
-   SOCKET_PATH_HOME, __func__, __LINE__);
-   return -1;
-   }
-   snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
-   dir_home, DPDK_DIR);
-   } else
-   snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
-   SOCKET_PATH_VAR_RUN, DPDK_DIR);
-
-   mkdir(dpdk_dir, 0700);
-   snprintf(dir, sizeof(dir), "%s%s",
-   dpdk_dir, SOCKET_DIR);
-   }
-
-   ret =  mkdir(dir, 0700);
-   /* if user passed socket path is invalid, return immediately */
-   if (ret < 0 && errno != EEXIST) {
-   RTE_LOG(ERR, PDUMP,
-   "Failed to create dir:%s:%s\n", dir,
-   strerror(errno));
-   rte_errno = errno;
-   return -1;
-   }
-
-   if (type == RTE_PDUMP_SOCKET_SERVER)
-   snprintf(buffer, bufsz, SERVER_SOCKET, dir);
-   else
-   snprintf(buffer, bufsz, CLIENT_SOCKET, dir, getpid(),
-   rte_sys_gettid());
-
-   return 0;
-}
-
-static int
-pdump_create_server_socket(void)
-{
-   int ret, socket_fd;
-   struct sockaddr_un addr;
-   socklen_t addr_len;
-
-   ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
-   RT

[dpdk-dev] [PATCH 0/4] allow procinfo and pdump on eth vdev

2018-03-04 Thread Jianfeng Tan
As we know, we have below limitations in vdev:
  - dpdk-procinfo cannot get the stats of (most) vdev in primary process;
  - dpdk-pdump cannot dump the packets for (most) vdev in primary proces;
  - secondary process cannot use (most) vdev in primary process.

The very first reason is that the secondary process actually does not know
the existence of those vdevs as vdevs are chained on a linked list, and
not shareable to secondary.

In this patch series, we would like to propose a vdev sharing model like this:
  - As a secondary process boots, all devices (including vdev) in primary
will be automatically shared. After both primary and secondary process
booted,
  - Device add/remove in primary will be translated to device hog plug/unplug
event in secondary processes. (TODO)
  - Device add in secondary
* If that kind of device support multi-process, the secondary will
  request the primary to probe the device and the primary to share
  it to the secondary. It's not necessary to have secondary-private
  device in this case. (TODO)
* If that kind of device does not support multi-process, the secondary
  will probe the device by itself, and the port id is shared among
  all primary/secondary processes.

This patch series don't:
  - provide secondary data path (Rx/Tx) support for each specific vdev.

How to test:

Step 0: start testpmd with a vhost port; and a VM connected to the vhost port.

Step 1: try using dpdk-procinfo to get the stats.
 $(dpdk-procinfo) --log-level=8 --no-pci -- --stats

Step 2: try using dpdk-pdump to dump the packets.
 $(dpdk-pdump) -- --pdump 'port=0,queue=*,rx-dev=/tmp/rx.pcap'

Jianfeng Tan (4):
  eal: bring forward multi-process channel init
  bus/vdev: bus scan by multi-process channel
  drivers/net: do not allocate rte_eth_dev_data privately
  drivers/net: share vdev data to secondary process

 drivers/bus/vdev/Makefile |   1 +
 drivers/bus/vdev/vdev.c   | 110 ++
 drivers/net/af_packet/rte_eth_af_packet.c |  42 ++--
 drivers/net/bonding/rte_eth_bond_pmd.c|  13 
 drivers/net/failsafe/failsafe.c   |  14 
 drivers/net/kni/rte_eth_kni.c |  25 ---
 drivers/net/null/rte_eth_null.c   |  30 
 drivers/net/octeontx/octeontx_ethdev.c|  28 
 drivers/net/pcap/rte_eth_pcap.c   |  31 +
 drivers/net/softnic/rte_eth_softnic.c |  19 +-
 drivers/net/tap/rte_eth_tap.c |  22 +++---
 drivers/net/vhost/rte_eth_vhost.c |  34 -
 lib/librte_eal/bsdapp/eal/eal.c   |  23 ---
 lib/librte_eal/linuxapp/eal/eal.c |  23 ---
 14 files changed, 295 insertions(+), 120 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH 1/4] eal: bring forward multi-process channel init

2018-03-04 Thread Jianfeng Tan
Adjust the init sequence: put mp channel init before bus scan
so that we can init the vdev bus through mp channel in the
secondary process before the bus scan.

Signed-off-by: Jianfeng Tan 
---
 lib/librte_eal/bsdapp/eal/eal.c   | 23 +--
 lib/librte_eal/linuxapp/eal/eal.c | 23 +--
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 4eafcb5..b469382 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -544,6 +544,19 @@ rte_eal_init(int argc, char **argv)
return -1;
}
 
+   rte_config_init();
+
+   /* Put mp channel init before bus scan so that we can init the vdev
+* bus through mp channel in the secondary process before the bus scan.
+*/
+   if (rte_mp_channel_init() < 0) {
+   rte_eal_init_alert("failed to init mp channel\n");
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   rte_errno = EFAULT;
+   return -1;
+   }
+   }
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
@@ -583,16 +596,6 @@ rte_eal_init(int argc, char **argv)
 
rte_srand(rte_rdtsc());
 
-   rte_config_init();
-
-   if (rte_mp_channel_init() < 0) {
-   rte_eal_init_alert("failed to init mp channel\n");
-   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-   rte_errno = EFAULT;
-   return -1;
-   }
-   }
-
if (rte_eal_memory_init() < 0) {
rte_eal_init_alert("Cannot init memory\n");
rte_errno = ENOMEM;
diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 4ca06f4..8914f91 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -769,6 +769,19 @@ rte_eal_init(int argc, char **argv)
return -1;
}
 
+   rte_config_init();
+
+   /* Put mp channel init before bus scan so that we can init the vdev
+* bus through mp channel in the secondary process before the bus scan.
+*/
+   if (rte_mp_channel_init() < 0) {
+   rte_eal_init_alert("failed to init mp channel\n");
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   rte_errno = EFAULT;
+   return -1;
+   }
+   }
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
@@ -815,8 +828,6 @@ rte_eal_init(int argc, char **argv)
 
rte_srand(rte_rdtsc());
 
-   rte_config_init();
-
if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
rte_eal_init_alert("Cannot init logging.");
rte_errno = ENOMEM;
@@ -824,14 +835,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
 
-   if (rte_mp_channel_init() < 0) {
-   rte_eal_init_alert("failed to init mp channel\n");
-   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-   rte_errno = EFAULT;
-   return -1;
-   }
-   }
-
 #ifdef VFIO_PRESENT
if (rte_eal_vfio_setup() < 0) {
rte_eal_init_alert("Cannot init VFIO\n");
-- 
2.7.4



[dpdk-dev] [PATCH 4/4] drivers/net: share vdev data to secondary process

2018-03-04 Thread Jianfeng Tan
dpdk-procinfo, as a secondary process, cannot fetch stats for vdev.

This patch enables that by attaching the port from the shared data.
We also fill the eth dev ops, with only some ops works in secondary
process, for example, stats_get().

Note that, we still cannot Rx/Tx packets on the ports which do not
support multi-process.

Reported-by: Signed-off-by: Vipin Varghese 
Signed-off-by: Jianfeng Tan 
---
 drivers/net/af_packet/rte_eth_af_packet.c | 17 +++--
 drivers/net/bonding/rte_eth_bond_pmd.c| 13 +
 drivers/net/failsafe/failsafe.c   | 14 ++
 drivers/net/kni/rte_eth_kni.c | 12 
 drivers/net/null/rte_eth_null.c   | 13 +
 drivers/net/octeontx/octeontx_ethdev.c| 14 ++
 drivers/net/pcap/rte_eth_pcap.c   | 13 +
 drivers/net/softnic/rte_eth_softnic.c | 19 ---
 drivers/net/tap/rte_eth_tap.c | 13 +
 drivers/net/vhost/rte_eth_vhost.c | 17 +++--
 10 files changed, 138 insertions(+), 7 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
b/drivers/net/af_packet/rte_eth_af_packet.c
index 2db692f..970cf05 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -915,9 +915,22 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
int ret = 0;
struct rte_kvargs *kvlist;
int sockfd = -1;
+   struct rte_eth_dev *eth_dev;
+   const char *name = rte_vdev_device_name(dev);
+
+   RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n", name);
 
-   RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n",
-   rte_vdev_device_name(dev));
+   if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+   strlen(rte_vdev_device_args(dev)) == 0) {
+   eth_dev = rte_eth_dev_attach_secondary(name);
+   if (!eth_dev) {
+   RTE_LOG(ERR, PMD, "Failed to probe %s\n", name);
+   return -1;
+   }
+   /* TODO: request info from primary to set up Rx and Tx */
+   eth_dev->dev_ops = &ops;
+   return 0;
+   }
 
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
if (kvlist == NULL) {
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index c34c325..7d6dea2 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -2994,6 +2994,7 @@ bond_probe(struct rte_vdev_device *dev)
uint8_t bonding_mode, socket_id/*, agg_mode*/;
int  arg_count, port_id;
uint8_t agg_mode;
+   struct rte_eth_dev *eth_dev;
 
if (!dev)
return -EINVAL;
@@ -3001,6 +3002,18 @@ bond_probe(struct rte_vdev_device *dev)
name = rte_vdev_device_name(dev);
RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
 
+   if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+   strlen(rte_vdev_device_args(dev)) == 0) {
+   eth_dev = rte_eth_dev_attach_secondary(name);
+   if (!eth_dev) {
+   RTE_LOG(ERR, PMD, "Failed to probe %s\n", name);
+   return -1;
+   }
+   /* TODO: request info from primary to set up Rx and Tx */
+   eth_dev->dev_ops = &default_dev_ops;
+   return 0;
+   }
+
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
pmd_bond_init_valid_arguments);
if (kvlist == NULL)
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index c499bfb..ea9fdc6 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -294,10 +294,24 @@ static int
 rte_pmd_failsafe_probe(struct rte_vdev_device *vdev)
 {
const char *name;
+   struct rte_eth_dev *eth_dev;
 
name = rte_vdev_device_name(vdev);
INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s",
name);
+
+   if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+   strlen(rte_vdev_device_args(vdev)) == 0) {
+   eth_dev = rte_eth_dev_attach_secondary(name);
+   if (!eth_dev) {
+   RTE_LOG(ERR, PMD, "Failed to probe %s\n", name);
+   return -1;
+   }
+   /* TODO: request info from primary to set up Rx and Tx */
+   eth_dev->dev_ops = &failsafe_ops;
+   return 0;
+   }
+
return fs_eth_dev_create(vdev);
 }
 
diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index 1a07089..24909c7 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -405,6 +405,18 @@ eth_kni_probe(struct rte_vdev_device *vdev)
params = rte_vdev_device_args(vdev);
RTE_LOG(INFO, PMD, "Initializing eth_kni f

[dpdk-dev] [PATCH 2/4] bus/vdev: bus scan by multi-process channel

2018-03-04 Thread Jianfeng Tan
To scan the vdevs in primary, we send request to primary process
to obtain the names for vdevs.

Only the name is shared from the primary. In probe(), the device
driver is supposed to locate (or request more) the detail
information from the primary.

Signed-off-by: Jianfeng Tan 
---
 drivers/bus/vdev/Makefile |   1 +
 drivers/bus/vdev/vdev.c   | 110 ++
 2 files changed, 111 insertions(+)

diff --git a/drivers/bus/vdev/Makefile b/drivers/bus/vdev/Makefile
index 24d424a..bd0bb89 100644
--- a/drivers/bus/vdev/Makefile
+++ b/drivers/bus/vdev/Makefile
@@ -10,6 +10,7 @@ LIB = librte_bus_vdev.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 # versioning export map
 EXPORT_MAP := rte_bus_vdev_version.map
diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index e4bc724..0a3ea52 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -314,6 +314,88 @@ rte_vdev_uninit(const char *name)
return 0;
 }
 
+struct vdev_param {
+#define VDEV_SCAN_REQ  1
+#define VDEV_SCAN_ONE  2
+#define VDEV_SCAN_REP  3
+   int type;
+   int num;
+   char name[RTE_DEV_NAME_MAX_LEN];
+};
+
+static int vdev_plug(struct rte_device *dev);
+
+static int
+vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)
+{
+   struct rte_vdev_device *dev;
+   struct rte_devargs *devargs;
+   struct rte_mp_msg mp_resp;
+   struct vdev_param *ou = (struct vdev_param *)&mp_resp.param;
+   const struct vdev_param *in = (const struct vdev_param *)mp_msg->param;
+   const char *devname;
+   int num;
+
+   strcpy(mp_resp.name, "vdev");
+   mp_resp.len_param = sizeof(*ou);
+   mp_resp.num_fds = 0;
+
+   switch (in->type) {
+   case VDEV_SCAN_REQ:
+   ou->type = VDEV_SCAN_ONE;
+   ou->num = 1;
+   num = 0;
+   TAILQ_FOREACH(dev, &vdev_device_list, next) {
+   devname = rte_vdev_device_name(dev);
+   if (strlen(devname) == 0)
+   VDEV_LOG(INFO, "vdev with no name is not sent");
+   VDEV_LOG(INFO, "send vdev, %s", devname);
+   strncpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN);
+   if (rte_mp_sendmsg(&mp_resp) < 0)
+   VDEV_LOG(ERR, "send vdev, %s, failed, %s",
+devname, strerror(rte_errno));
+   num++;
+   }
+   ou->type = VDEV_SCAN_REP;
+   ou->num = num;
+   if (rte_mp_reply(&mp_resp, peer) < 0)
+   VDEV_LOG(ERR, "Failed to reply a scan request");
+   break;
+   case VDEV_SCAN_ONE:
+   VDEV_LOG(INFO, "receive vdev, %s", in->name);
+   dev = find_vdev(in->name);
+   if (dev) {
+   VDEV_LOG(ERR, "vdev already exists: %s", in->name);
+   break;
+   }
+
+   devargs = alloc_devargs(in->name, NULL);
+   if (!devargs) {
+   VDEV_LOG(ERR, "failed to allocate memory");
+   break;
+   }
+
+   dev = calloc(1, sizeof(*dev));
+   if (!dev) {
+   VDEV_LOG(ERR, "failed to allocate memory");
+   free(devargs);
+   break;
+   }
+
+   dev->device.devargs = devargs;
+   dev->device.numa_node = 0; /* to be corrected in probe() */
+   dev->device.name = devargs->name;
+
+   TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+   TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
+   break;
+   default:
+   VDEV_LOG(ERR, "vdev cannot recognize this message");
+   }
+
+   return 0;
+}
+
 static int
 vdev_scan(void)
 {
@@ -321,6 +403,34 @@ vdev_scan(void)
struct rte_devargs *devargs;
struct vdev_custom_scan *custom_scan;
 
+   if (rte_mp_action_register("vdev", vdev_action) < 0 &&
+   rte_errno != EEXIST) {
+   VDEV_LOG(ERR, "vdev fails to add action");
+   return -1;
+   }
+
+   if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+   struct rte_mp_msg mp_req, *mp_rep;
+   struct rte_mp_reply mp_reply;
+   struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+   struct vdev_param *req = (struct vdev_param *)mp_req.param;
+   struct vdev_param *resp;
+
+   strcpy(mp_req.name, "vdev");
+   mp_req.len_param = sizeof(*req);
+   mp_req.num_fds = 0;
+   req->type = VDEV_SCAN_REQ;
+   if (rte_mp_request(&mp_req, &mp_reply, &ts) == 0 &&
+   mp_reply.nb_received == 1) {
+   mp_rep = &mp_reply.msgs[0];
+  

[dpdk-dev] [PATCH 3/4] drivers/net: do not allocate rte_eth_dev_data privately

2018-03-04 Thread Jianfeng Tan
We introduced private rte_eth_dev_data to allow vdev to be created
both in primary process and secondary process(es). This is not
friendly to multi-process model, for example, it leads to port id
contention issue if two processes both find the data entry is free.

And to get stats of primary vdev in secondary, we must allocate
from the pre-defined array so that we can find it.

Suggested-by: Bruce Richardson 
Signed-off-by: Jianfeng Tan 
---
 drivers/net/af_packet/rte_eth_af_packet.c | 25 +++--
 drivers/net/kni/rte_eth_kni.c | 13 ++---
 drivers/net/null/rte_eth_null.c   | 17 +++--
 drivers/net/octeontx/octeontx_ethdev.c| 14 ++
 drivers/net/pcap/rte_eth_pcap.c   | 18 +++---
 drivers/net/tap/rte_eth_tap.c |  9 +
 drivers/net/vhost/rte_eth_vhost.c | 17 ++---
 7 files changed, 20 insertions(+), 93 deletions(-)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
b/drivers/net/af_packet/rte_eth_af_packet.c
index 57eccfd..2db692f 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -564,25 +564,17 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
RTE_LOG(ERR, PMD,
"%s: no interface specified for AF_PACKET ethdev\n",
name);
-   goto error_early;
+   return -1;
}
 
RTE_LOG(INFO, PMD,
"%s: creating AF_PACKET-backed ethdev on numa socket %u\n",
name, numa_node);
 
-   /*
-* now do all data allocation - for eth_dev structure, dummy pci driver
-* and internal (private) data
-*/
-   data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
-   if (data == NULL)
-   goto error_early;
-
*internals = rte_zmalloc_socket(name, sizeof(**internals),
0, numa_node);
if (*internals == NULL)
-   goto error_early;
+   return -1;
 
for (q = 0; q < nb_queues; q++) {
(*internals)->rx_queue[q].map = MAP_FAILED;
@@ -604,24 +596,24 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
RTE_LOG(ERR, PMD,
"%s: I/F name too long (%s)\n",
name, pair->value);
-   goto error_early;
+   return -1;
}
if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) {
RTE_LOG(ERR, PMD,
"%s: ioctl failed (SIOCGIFINDEX)\n",
name);
-   goto error_early;
+   return -1;
}
(*internals)->if_name = strdup(pair->value);
if ((*internals)->if_name == NULL)
-   goto error_early;
+   return -1;
(*internals)->if_index = ifr.ifr_ifindex;
 
if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) {
RTE_LOG(ERR, PMD,
"%s: ioctl failed (SIOCGIFHWADDR)\n",
name);
-   goto error_early;
+   return -1;
}
memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
 
@@ -775,14 +767,13 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 
(*internals)->nb_queues = nb_queues;
 
-   rte_memcpy(data, (*eth_dev)->data, sizeof(*data));
+   data = (*eth_dev)->data;
data->dev_private = *internals;
data->nb_rx_queues = (uint16_t)nb_queues;
data->nb_tx_queues = (uint16_t)nb_queues;
data->dev_link = pmd_link;
data->mac_addrs = &(*internals)->eth_addr;
 
-   (*eth_dev)->data = data;
(*eth_dev)->dev_ops = &ops;
 
return 0;
@@ -802,8 +793,6 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
}
free((*internals)->if_name);
rte_free(*internals);
-error_early:
-   rte_free(data);
return -1;
 }
 
diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index dc4e65f..1a07089 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -337,25 +337,17 @@ eth_kni_create(struct rte_vdev_device *vdev,
struct pmd_internals *internals;
struct rte_eth_dev_data *data;
struct rte_eth_dev *eth_dev;
-   const char *name;
 
RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
numa_node);
 
-   name = rte_vdev_device_name(vdev);
-   data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
-   if (data == NULL)
-   return NULL;
-
/* reserve an ethdev entry */
eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*internals));
-   if (eth_dev == NULL) {
-   rte_free(data);
+   if (eth_dev == NULL)
return NULL;
-   }
 
internals = eth_dev->data->dev_private;
-   rte_memcpy(data, eth_d

Re: [dpdk-dev] [PATCH] usertools/dpdk-devbind.py: add support for avp device

2018-03-04 Thread Zhang, Xiaohua
Hi Yigit,
Should I create an update one and send out?

BR.
Xiaohua Zhang

-Original Message-
From: Ferruh Yigit [mailto:ferruh.yi...@intel.com] 
Sent: Friday, March 02, 2018 11:35 PM
To: Zhang, Xiaohua; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] usertools/dpdk-devbind.py: add support for avp 
device

On 2/26/2018 5:43 AM, Xiaohua Zhang wrote:
> Signed-off-by: Xiaohua Zhang 

Patch title should be:
usertools: add support for AVP device

Except from that,

Acked-by: Ferruh Yigit 


Re: [dpdk-dev] 16.11.5 (LTS) patches review and test

2018-03-04 Thread gowrishankar muthukrishnan

Hi Luca,
In powerpc to support i40e, we wish below patch be merged:

c3def6a8724 net/i40e: implement vector PMD for altivec

I have verified br-16.11 with the above commit (in cherry-pick, I needed 
to remove release

notes which was meant for 17.05 release which hope is fine here).
Could you please merge the above.

Thanks,
Gowrishankar

On Monday 26 February 2018 05:04 PM, Luca Boccassi wrote:

Hi all,

Here is a list of patches targeted for LTS release 16.11.5. Please
help review and test. The planned date for the final release is March
the 5th, pending results from regression tests.
Before that, please shout if anyone has objections with these
patches being applied.

These patches are located at branch 16.11 of dpdk-stable repo:
 http://dpdk.org/browse/dpdk-stable/

Thanks.

Luca Boccassi

---
Ajit Khaparde (6):
   net/bnxt: support new PCI IDs
   net/bnxt: parse checksum offload flags
   net/bnxt: fix group info usage
   net/bnxt: fix broadcast cofiguration
   net/bnxt: fix size of Tx ring in HW
   net/bnxt: fix link speed setting with autoneg off

Akhil Goyal (1):
   examples/ipsec-secgw: fix corner case for SPI value

Alejandro Lucero (3):
   net/nfp: fix MTU settings
   net/nfp: fix jumbo settings
   net/nfp: fix CRC strip check behaviour

Anatoly Burakov (14):
   memzone: fix leak on allocation error
   malloc: protect stats with lock
   malloc: fix end for bounded elements
   vfio: fix enabled check on error
   app/procinfo: add compilation option in config
   test: register test as failed if setup failed
   test/table: fix uninitialized parameter
   test/memzone: fix wrong test
   test/memzone: handle previously allocated memzones
   usertools/devbind: remove unused function
   test/reorder: fix memory leak
   test/ring_perf: fix memory leak
   test/table: fix memory leak
   test/timer_perf: fix memory leak

Andriy Berestovskyy (1):
   keepalive: fix state alignment

Bao-Long Tran (1):
   examples/ip_pipeline: fix timer period unit

Beilei Xing (8):
   net/i40e: fix flow director Rx resource defect
   net/i40e: add warnings when writing global registers
   net/i40e: add debug logs when writing global registers
   net/i40e: fix multiple driver support issue
   net/i40e: fix interrupt conflict when using multi-driver
   net/i40e: fix Rx interrupt
   net/i40e: check multi-driver option parsing
   app/testpmd: fix flow director filter

Chas Williams (1):
   net/bonding: fix setting slave MAC addresses

David Harton (1):
   net/i40e: fix VF reset stats crash

Didier Pallard (1):
   net/virtio: fix incorrect cast

Dustin Lundquist (1):
   examples/exception_path: align stats on cache line

Erez Ferber (1):
   net/mlx5: fix MTU update

Ferruh Yigit (1):
   kni: fix build with kernel 4.15

Fiona Trahe (1):
   crypto/qat: fix null auth algo overwrite

Gowrishankar Muthukrishnan (2):
   eal/ppc: remove the braces in memory barrier macros
   eal/ppc: support sPAPR IOMMU for vfio-pci

Harish Patil (2):
   net/qede: fix to reject config with no Rx queue
   net/qede/base: fix VF LRO tunnel configuration

Hemant Agrawal (4):
   pmdinfogen: fix cross compilation for ARM big endian
   lpm: fix ARM big endian build
   net/i40e: fix ARM big endian build
   net/ixgbe: fix ARM big endian build

Hyong Youb Kim (1):
   net/enic: fix crash due to static max number of queues

Igor Ryzhov (1):
   net/i40e: fix flag for MAC address write

Ilya V. Matveychikov (2):
   eal: update assertion macro
   mbuf: cleanup function to get last segment

Jerin Jacob (3):
   net/thunderx: fix multi segment Tx function return
   test/crypto: fix missing include
   ethdev: fix data alignment

Jerry Lilijun (1):
   net/bonding: fix activated slave in 8023ad mode

Jianfeng Tan (3):
   vhost: fix crash
   net/vhost: fix log messages on create/destroy
   net/virtio-user: fix start with kernel vhost

Junjie Chen (3):
   vhost: fix dequeue zero copy with virtio1
   examples/vhost: fix sending ARP packet to self
   vhost: fix mbuf free

Kefu Chai (1):
   contigmem: fix build on FreeBSD 12

Konstantin Ananyev (1):
   eal/x86: use lock-prefixed instructions for SMP barrier

Liang-Min Larry Wang (1):
   net/ixgbe: improve link state check on VF

Marko Kovacevic (2):
   mk: support renamed Makefile in external project
   mk: fix external build

Markus Theil (2):
   igb_uio: fix IRQ disable on recent kernels
   igb_uio: fix MSI-X IRQ assignment with new IRQ function

Matan Azrad (2):
   app/testpmd: fix port index in RSS forward config
   app/testpmd: fix port topology in RSS forward config

Matej Vido (1):
   net/szedata2: fix check of mmap return value

Maxime Coquelin (1):
   net/virtio: fix resuming port with Rx vector path

Michael McConville (1):
  

Re: [dpdk-dev] [PATCH 1/4] vhost: move fdset functions from fd_man.c to fd_man.h

2018-03-04 Thread Yang, Zhiyong


> -Original Message-
> From: Thomas Monjalon [mailto:tho...@monjalon.net]
> Sent: Thursday, March 1, 2018 10:14 PM
> To: Tan, Jianfeng 
> Cc: Maxime Coquelin ; Yang, Zhiyong
> ; dev@dpdk.org; y...@fridaylinux.org; Bie, Tiwei
> ; Wang, Zhihong ; Wang,
> Dong1 
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 01/03/2018 07:02, Tan, Jianfeng:
> > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > >>>lib/librte_vhost/Makefile |   3 +-
> > > >>>lib/librte_vhost/fd_man.c | 274 
> > > >>> ---
> ---
> > > >>>lib/librte_vhost/fd_man.h | 258
> > > >> +--
> > > >>>3 files changed, 253 insertions(+), 282 deletions(-)
> > > >>>delete mode 100644 lib/librte_vhost/fd_man.c
> > > >>
> > > >> I disagree with the patch.
> > > >> It is a good thing to reuse the code, but to do it, you need to
> > > >> extend the vhost lib API.
> > > >>
> > > >> New API need to be prefixed with rte_vhost_, and be declared in
> > > >> rte_vhost.h.
> > > >>
> > > >> And no need to move the functions from the .c to the .h file, as
> > > >> it
> > > moreover
> > > >> makes you inline them, which is not necessary here.
> > > >
> > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > >
> > > > I considered to do it as you said. However I still preferred this one 
> > > > at last.
> > > > Here are my reasons.
> > > > 1) As far as I know, this set of functions are used privately in
> > > > librte_vhost
> > > before this feature.
> > > > No strong request from the perspective of DPDK application. If I
> > > understand well,  It is enough to expose the functions to all PMDs
> > > > And it is better to keep internal use in DPDK.
> > >
> > > But what the patch is doing is adding fd_man.h to the API, without
> > > doing it properly. fd_man.h will be installed with other header
> > > files, and any external application can use it.
> > >
> > > >
> > > > 2) These functions help to implement vhost user, but they are not
> > > > strongly
> > > related to other APIs of vhost user which have already exposed.
> > > > if we want to expose them as APIs at lib layer, many functions and
> > > > related
> > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > Your opinion?
> > >
> > > Yes, it is not really vhost-related, it could be part of a more
> > > generic library. It is maybe better to duplicate these lines, or to
> > > move this code in a existing or new library.
> >
> > I vote to move it to generic library, maybe eal. Poll() has better
> compatibility even though poll() is not as performant as epoll().
> >
> > Thomas, how do you think?
> 
> I don't see why it should be exported outside of DPDK, except for PMDs.
> I would tend to keep it internal but I understand that it would mean
> duplicating some code, which is not ideal.
> Please could you show what would be the content of the .h in EAL?
> 

If needed to expose them in eal.h, 
I think that they should be the whole fdset mechanism as followings.

typedef void (*fd_cb)(int fd, void *dat, int *remove);

struct fdentry {
int fd; /* -1 indicates this entry is empty */
fd_cb rcb;  /* callback when this fd is readable. */
fd_cb wcb;  /* callback when this fd is writeable.*/
void *dat;  /* fd context */
int busy;   /* whether this entry is being used in cb. */
};

struct fdset {
struct pollfd rwfds[MAX_FDS];
struct fdentry fd[MAX_FDS];
pthread_mutex_t fd_mutex;
int num;/* current fd number of this fdset */
};

void fdset_init(struct fdset *pfdset);(not used in the patchset)

int fdset_add(struct fdset *pfdset, int fd,
fd_cb rcb, fd_cb wcb, void *dat); (used in this patchset)

void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset)

void *fdset_event_dispatch(void *arg);   (used in this patchset)

seems that we have 4 options.
1) expose them in librte_vhost
2) expose them in other existing or new libs. for example,  eal.
3) duplicate the code lines at PMD layer.
4) do it as the patch does that.

thanks
Zhiyong