[dpdk-dev] [PATCH FIX-OPTION-2 1/2] eal: use named memzone to store user mempool ops name

2018-02-02 Thread Hemant Agrawal
The new mbuf pool ops name API uses the named memzone to store different
types of configured mempool ops name. It is better to also save the user
configured mempool ops name in named memzone. This way the best mempool
ops name can easily get the user configured mempool ops name for it's
decisions.

This will also avoid the need to maintain a eal api for default mempool
ops name. 

Signed-off-by: Hemant Agrawal 
---
 lib/librte_eal/bsdapp/eal/Makefile |  1 +
 lib/librte_eal/bsdapp/eal/eal.c|  5 +++
 lib/librte_eal/common/eal_common_mempool.c | 50 ++
 lib/librte_eal/common/eal_private.h| 12 +++
 lib/librte_eal/common/meson.build  |  1 +
 lib/librte_eal/linuxapp/eal/Makefile   |  1 +
 lib/librte_eal/linuxapp/eal/eal.c  |  5 +++
 7 files changed, 75 insertions(+)
 create mode 100644 lib/librte_eal/common/eal_common_mempool.c

diff --git a/lib/librte_eal/bsdapp/eal/Makefile 
b/lib/librte_eal/bsdapp/eal/Makefile
index dd455e6..f07dace 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -38,6 +38,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_mempool.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 1622a41..2c33a60 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -621,6 +621,11 @@ rte_eal_init(int argc, char **argv)
return -1;
}
 
+   if (rte_eal_mempool_ops_config() < 0) {
+   rte_eal_init_alert("Cannot config user Mempool Ops\n");
+   return -1;
+   }
+
eal_check_mem_on_local_socket();
 
eal_thread_init_master(rte_config.master_lcore);
diff --git a/lib/librte_eal/common/eal_common_mempool.c 
b/lib/librte_eal/common/eal_common_mempool.c
new file mode 100644
index 000..1d10a75
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_mempool.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+/* init mempool ops */
+int
+rte_eal_mempool_ops_config(void)
+{
+   RTE_LOG(DEBUG, EAL, "Configuring user mempool ops name...\n");
+
+   /* secondary processes don't need to initialise anything */
+   if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+   return 0;
+
+   if (internal_config.user_mbuf_pool_ops_name) {
+   const struct rte_memzone *mz;
+
+   mz = rte_memzone_lookup("mbuf_user_pool_ops");
+   if (mz == NULL) {
+   mz = rte_memzone_reserve("mbuf_user_pool_ops",
+   32, SOCKET_ID_ANY, 0);
+   if (mz == NULL)
+   return -rte_errno;
+   }
+
+   strncpy(mz->addr, internal_config.user_mbuf_pool_ops_name,
+   strlen(internal_config.user_mbuf_pool_ops_name));
+   }
+
+   return 0;
+}
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 0b28770..76beaff 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -205,4 +205,16 @@ struct rte_bus *rte_bus_find_by_device_name(const char 
*str);
 
 int rte_mp_channel_init(void);
 
+/**
+ * Mempool ops configuration
+ *
+ * This function is private to EAL.
+ *
+ * Set the user defined mempool ops in the named memzone area.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_mempool_ops_config(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/meson.build 
b/lib/librte_eal/common/meson.build
index 82b8910..3c6125b 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -16,6 +16,7 @@ common_sources = files(
'eal_common_lcore.c',
'eal_common_log.c',
'eal_common_memory.c',
+   'eal_common_mempool.c',
'eal_common_memzone.c',
'eal_common_options.c',
'eal_common_proc.c',
diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index 7e5bbe8..83a08b0 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -46,6 +46,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c

[dpdk-dev] [PATCH FIX-OPTION-1] mbuf: fix the logic of user mempool ops API

2018-02-02 Thread Hemant Agrawal
From: Nipun Gupta 

The existing rte_eal_mbuf_default mempool ops can return the compile time
default ops name if the user has not provided command line inputs for
mempool ops name. It will break the logic of best mempool ops as it will
never return platform hw mempool ops.

This patch introduces a new API to just return the user mempool ops only.

Fixes: 8b0f7f434132 ("mbuf: maintain user and compile time mempool ops name")

Signed-off-by: Nipun Gupta 
---
 lib/librte_eal/bsdapp/eal/eal.c |  7 +++
 lib/librte_eal/common/include/rte_eal.h | 12 
 lib/librte_eal/linuxapp/eal/eal.c   |  7 +++
 lib/librte_eal/rte_eal_version.map  |  1 +
 lib/librte_mbuf/rte_mbuf_pool_ops.c |  2 +-
 5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 1622a41..4eafcb5 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -82,6 +82,13 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
+/* Return user provided mbuf pool ops name */
+const char * __rte_experimental
+rte_eal_mbuf_user_pool_ops(void)
+{
+   return internal_config.user_mbuf_pool_ops_name;
+}
+
 /* Return mbuf pool ops name */
 const char *
 rte_eal_mbuf_default_mempool_ops(void)
diff --git a/lib/librte_eal/common/include/rte_eal.h 
b/lib/librte_eal/common/include/rte_eal.h
index 08c6637..044474e 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -452,6 +452,18 @@ static inline int rte_gettid(void)
 enum rte_iova_mode rte_eal_iova_mode(void);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get user provided pool ops name for mbuf
+ *
+ * @return
+ *   returns user provided pool ops name.
+ */
+const char * __rte_experimental
+rte_eal_mbuf_user_pool_ops(void);
+
+/**
  * Get default pool ops name for mbuf
  *
  * @return
diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 451fdaf..38306bf 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -92,6 +92,13 @@ struct internal_config internal_config;
 /* used by rte_rdtsc() */
 int rte_cycles_vmware_tsc_map;
 
+/* Return user provided mbuf pool ops name */
+const char * __rte_experimental
+rte_eal_mbuf_user_pool_ops(void)
+{
+   return internal_config.user_mbuf_pool_ops_name;
+}
+
 /* Return mbuf pool ops name */
 const char *
 rte_eal_mbuf_default_mempool_ops(void)
diff --git a/lib/librte_eal/rte_eal_version.map 
b/lib/librte_eal/rte_eal_version.map
index 4146907..2e6cbe9 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -220,6 +220,7 @@ EXPERIMENTAL {
rte_eal_devargs_remove;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
+   rte_eal_mbuf_user_pool_ops;
rte_mp_action_register;
rte_mp_action_unregister;
rte_mp_sendmsg;
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c 
b/lib/librte_mbuf/rte_mbuf_pool_ops.c
index 385fc43..48cc342 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.c
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c
@@ -74,7 +74,7 @@ rte_mbuf_user_mempool_ops(void)
 
mz = rte_memzone_lookup("mbuf_user_pool_ops");
if (mz == NULL)
-   return rte_eal_mbuf_default_mempool_ops();
+   return rte_eal_mbuf_user_pool_ops();
return mz->addr;
 }
 
-- 
2.7.4



[dpdk-dev] [PATCH] doc: remove eal API for default mempool ops name

2018-02-02 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
---
 doc/guides/rel_notes/deprecation.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index d59ad59..a2b391c 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -8,6 +8,15 @@ API and ABI deprecation notices are to be posted here.
 Deprecation Notices
 ---
 
+* eal: a new set of mbuf mempool ops name APIs for user, platform and best
+  mempool names have been defined in ``rte_mbuf`` in v18.02. The uses of
+  ``rte_eal_mbuf_default_mempool_ops`` shall be replaced by
+  ``rte_mbuf_best_mempool_ops``.
+  The following function is now redundant and it is target to be deprecated in 
+  18.05:
+
+  - ``rte_eal_mbuf_default_mempool_ops``
+  
 * eal: several API and ABI changes are planned for ``rte_devargs`` in v18.02.
   The format of device command line parameters will change. The bus will need
   to be explicitly stated in the device declaration. The enum ``rte_devtype``
-- 
2.7.4



[dpdk-dev] [PATCH FIX-OPTION-2 2/2] mbuf: fix user mempool ops get to use only named memzone

2018-02-02 Thread Hemant Agrawal
The eal default mempool ops API can also return the compile
time default mempool ops name, which can break the best mempool
ops name logic.

Fixes: a3acc3144a76 ("mbuf: add pool ops selection functions")

Signed-off-by: Hemant Agrawal 
---
 lib/librte_mbuf/rte_mbuf_pool_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c 
b/lib/librte_mbuf/rte_mbuf_pool_ops.c
index 385fc43..a636c28 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.c
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c
@@ -74,7 +74,7 @@ rte_mbuf_user_mempool_ops(void)
 
mz = rte_memzone_lookup("mbuf_user_pool_ops");
if (mz == NULL)
-   return rte_eal_mbuf_default_mempool_ops();
+   return NULL;
return mz->addr;
 }
 
-- 
2.7.4



Re: [dpdk-dev] [PATCH] event/rx_adapter: fix ignore return of event start

2018-02-02 Thread Varghese, Vipin


> -Original Message-
> From: Jerin Jacob [mailto:jerin.ja...@caviumnetworks.com]
> Sent: Wednesday, January 31, 2018 6:54 AM
> To: Rao, Nikhil 
> Cc: Varghese, Vipin ; dev@dpdk.org; Jacob, Jerin
> ; Van Haaren, Harry
> ; Hemant Agrawal ;
> Jain, Deepak K 
> Subject: Re: [PATCH] event/rx_adapter: fix ignore return of event start
> 
> -Original Message-
> >
> >
> > Adding eventdev PMD folks for their suggestions on how to handle the return
> value from rte_event_dev_start() below.
> >
> > > -Original Message-
> > > From: Varghese, Vipin
> > > Sent: Wednesday, January 31, 2018 4:26 AM
> > > To: dev@dpdk.org; Rao, Nikhil 
> > > Cc: Jain, Deepak K ; Varghese, Vipin
> > > 
> > > Subject: [PATCH] event/rx_adapter: fix ignore return of event start
> > >
> > > Capture the return value for rte_event_dev_start. Return the result
> > > back to user.
> > >
> > > Coverity issue: 257000
> > > Fixes: 9c38b704d280 ("eventdev: add eth Rx adapter implementation")
> > > Cc: nikhil@intel.com
> > >
> > > Signed-off-by: Vipin Varghese 
> > > ---
> > >  lib/librte_eventdev/rte_event_eth_rx_adapter.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > index 90106e6..a818bef 100644
> > > --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > @@ -603,7 +603,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
> > >   RTE_EDEV_LOG_ERR("failed to configure event dev %u\n",
> > >   dev_id);
> > >   if (started)
> > > - rte_event_dev_start(dev_id);
> > > + ret = rte_event_dev_start(dev_id);
> >
> > Currently the a non-zero return value at this point signifies an error 
> > returned
> from rte_event_dev_configure(),  so I suggest that the return value is 
> typecasted
> to void.
> 
> If I understand it correctly, Any one of the failure(configure() or start()) 
> should
> result in bad state. Right?
> i.e If some reason PMD is not able to start() even after failure 
> configuration()
> would result in bad state.
> If so, one option could be combine the error like ret |= operation or create a
> new logical error in Rx adapter which denotes this new error.
> 

So do we agree to ACK these changes to get the code fix to the mainline? Then 
rework the logic as required?

> >
> > >   return ret;
> > >   }
> > >
> > > @@ -617,7 +617,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
> > >   conf->event_port_id = port_id;
> > >   conf->max_nb_rx = 128;
> > >   if (started)
> > > - rte_event_dev_start(dev_id);
> > > + ret = rte_event_dev_start(dev_id);
> > This change looks good to me.
> >
> > >   rx_adapter->default_cb_arg = 1;
> > >   return ret;
> > >  }
> > > --
> > > 1.9.1
> >


Re: [dpdk-dev] [PATCH] net/i40e: update Rx checksum offload

2018-02-02 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Beilei Xing
> Sent: Wednesday, January 31, 2018 11:34 AM
> To: Zhang, Qi Z
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/i40e: update Rx checksum offload
> 
> HW supports outer IP Rx checksum offload, this patch updates Rx checksum
> offload for PF and VF.
> 
> Signed-off-by: Beilei Xing 
Acked-by: Helin Zhang 


[dpdk-dev] XL710: [Q] traffic steering under DPDK.

2018-02-02 Thread Arkady Gilinsky
Hi,

Basically we are trying to configure packet steering to specific VF
according to L2 filter on XL710 Intel NIC adapter. We need to support
steering by MAC address and VLAN VID.
We do not need perfect match filtering, packet supposed to be sent to
specific VF when MAC address or VLAN VID matches configured values. See
pseudocode:
“
If(packet->mac == expected_mac_vf_1 || packet->vlan.vid ==
expected_vlan_vf_1) {
sendToVf_1(packet);
return;
}
…
“
Typically in production multiple MACs and VLANs will be associated with the
same VF. There could be several MACs(10th) and many VLANs(100th) in our
system.
We are using kernel PF + DPDK VF.

- Can we somehow configure XL710 to classify packet by MAC and VLAN
separately (OR instead of AND)? (We have a working code that did MAC AND
VLAN.)
- What mechanism supposed to be used to configure L2 steering? VEB (Virtual
Ethernet Bridge) or Flow Director?
- i40evf driver from DPDK does not allow us to disable “CRC stripping”
feature.
We cannot set “hw_strip_crc” to “0”. See “i40e_ethdev_vf.c:1593”:
“
/* For non-DPDK PF drivers, VF has no ability to disable HW
* CRC strip, and is implicitly enabled by the PF.
*/
if (!conf->rxmode.hw_strip_crc) {
vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
if ((vf->version_major == I40E_VIRTCHNL_VERSION_MAJOR) &&
(vf->version_minor <= I40E_VIRTCHNL_VERSION_MINOR)) {
/* Peer is running non-DPDK PF driver. */
PMD_INIT_LOG(ERR, "VF can't disable HW CRC Strip");
return -EINVAL;
}
}
“
- We see that VF strips VLAN from packet. Can we preserve tag in the packet?
Following DPDK API setting does not help: “port_conf.rxmode.hw_vlan_strip =
0”.

Please advice.
=
Best regards
Arkady Gilinsky.


Re: [dpdk-dev] [PATCH] net/i40e: update Rx checksum offload

2018-02-02 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhang, Helin
> Sent: Friday, February 2, 2018 4:14 PM
> To: Xing, Beilei; Zhang, Qi Z
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: update Rx checksum offload
> 
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Beilei Xing
> > Sent: Wednesday, January 31, 2018 11:34 AM
> > To: Zhang, Qi Z
> > Cc: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] net/i40e: update Rx checksum offload
> >
> > HW supports outer IP Rx checksum offload, this patch updates Rx
> > checksum offload for PF and VF.
> >
> > Signed-off-by: Beilei Xing 
> Acked-by: Helin Zhang 
Applied to dpdk-next-net-intel, thanks!

/Helin


[dpdk-dev] [PATCH v2] doc: remove eal API for default mempool ops name

2018-02-02 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
---
v2: fix checkpatch errors

 doc/guides/rel_notes/deprecation.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index d59ad59..c7d8f25 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -8,6 +8,15 @@ API and ABI deprecation notices are to be posted here.
 Deprecation Notices
 ---
 
+* eal: a new set of mbuf mempool ops name APIs for user, platform and best
+  mempool names have been defined in ``rte_mbuf`` in v18.02. The uses of
+  ``rte_eal_mbuf_default_mempool_ops`` shall be replaced by
+  ``rte_mbuf_best_mempool_ops``.
+  The following function is now redundant and it is target to be deprecated in
+  18.05:
+
+  - ``rte_eal_mbuf_default_mempool_ops``
+
 * eal: several API and ABI changes are planned for ``rte_devargs`` in v18.02.
   The format of device command line parameters will change. The bus will need
   to be explicitly stated in the device declaration. The enum ``rte_devtype``
-- 
2.7.4



[dpdk-dev] [PATCH] vhost: unlink existing file for server mode

2018-02-02 Thread Zhiyong Yang
Vhost-user startup will fail based on server mode, if the specified
socket file has already existed. The patch introduces function
unlink() to remove the possible existing file.

Cc: y...@fridaylinux.org
Cc: maxime.coque...@redhat.com

Signed-off-by: Zhiyong Yang 
---
 lib/librte_vhost/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 6e3857e7a..324a24f4e 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -315,6 +315,7 @@ vhost_user_start_server(struct vhost_user_socket *vsocket)
int fd = vsocket->socket_fd;
const char *path = vsocket->path;
 
+   unlink(path);
ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
-- 
2.13.3



Re: [dpdk-dev] [PATCH] doc: announce ABI change for crypto info struct

2018-02-02 Thread De Lara Guarch, Pablo
Hi Shally,

> -Original Message-
> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> Sent: Tuesday, January 30, 2018 11:54 AM
> To: De Lara Guarch, Pablo ; Akhil Goyal
> ; Trahe, Fiona ;
> hemant.agra...@nxp.com; Doherty, Declan ;
> Griffin, John ; Jain, Deepak K
> ; j...@semihalf.com; t...@semihalf.com;
> d...@marvell.com; nsams...@marvell.com; jianbo@arm.com; Jacob,
> Jerin ; Athreya, Narayana Prasad
> ; Murthy, Nidadavolu
> 
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] doc: announce ABI change for crypto info
> struct
> 
> 
> 
> >-Original Message-
> >From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
> >Sent: 30 January 2018 16:51
> >To: Verma, Shally ; Akhil Goyal
> >; Trahe, Fiona ;
> >hemant.agra...@nxp.com; Doherty, Declan ;
> >Griffin, John ; Jain, Deepak K
> >; j...@semihalf.com; t...@semihalf.com;
> >d...@marvell.com; nsams...@marvell.com; jianbo@arm.com; Jacob,
> >Jerin ; Athreya, Narayana Prasad
> >; Murthy, Nidadavolu
> >
> >Cc: dev@dpdk.org
> >Subject: RE: [dpdk-dev] [PATCH] doc: announce ABI change for crypto
> >info struct
> >
> >Hi Shally/Ahkil,
> >
> >> -Original Message-
> >> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Verma, Shally
> >> Sent: Tuesday, January 30, 2018 7:56 AM
> >> To: Akhil Goyal ; De Lara Guarch, Pablo
> >> ; Trahe, Fiona
> >> ; hemant.agra...@nxp.com; Doherty, Declan
> >> ; Griffin, John ;
> >> Jain, Deepak K ; j...@semihalf.com;
> >> t...@semihalf.com; d...@marvell.com; nsams...@marvell.com;
> >> jianbo@arm.com; Jacob, Jerin
> >> ; Athreya, Narayana Prasad
> >> ; Murthy, Nidadavolu
> >> 
> >> Cc: dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH] doc: announce ABI change for crypto
> >> info struct
> >>
> >> I do see current cryptodev unit testcase (inside \test dir) uses
> >> info.sym.max_nb_sessions param for session mempool_create. So, such
> >> testcases change are also in proposal?
> >
> >Yes, for these tests, we can just define a macro in the tests, instead of
> using the info structure.
> 
> [Shally] Ok, then you mean applications will choose any random number
> during mempool_create and not dependent on device max_nb_sessions?

Yes, actually for the unit tests, even one session is enough.

> 
> >>
> >> Another point, we recently submitted an RFC patch on lib/cryptodev
> >> with asymmetric crypto support
> >> (https://dpdk.org/dev/patchwork/patch/34308/) which is awaiting
> >> review and these fields have role to play there.
> >> So, could this change be please viewed in conjunction with asym RFC?
> >
> >Do you need it for asymmetric? Anyway, this would remove the
> symmetric function and structures, not applicable for you.
> 
> [Shally] I would say addition of asym in lib/cryptodev is not entirely
> standalone, specifically for PMDs that can support both.
> My key concern are max_nb_sessions_per_qp and related
> qp_attach_sym/asym APIs which enable management of queue distribution
> among sym and asym in current proposal, specifically, for PMDs that can
> support both but have dedicated qp for each. Right now proposal is open
> for feedback and would prefer to be covered before sym related changes
> could be applied.

Actually, I have been thinking about this. Given the time we have until 18.02 
is out,
and that this is not urgent to be applied (this is just code cleanup),
I am postponing this until next release. 

My other reason is that the info structure has a rte_pci_device pointer which 
should be removed.
However, I believe it is better to leave it for next release and discuss it 
with other libraries which has this, like ethdev.

Thanks,
Pablo



Re: [dpdk-dev] [PATCH] mk: add debug target

2018-02-02 Thread Thomas Monjalon
01/02/2018 18:04, Ferruh Yigit:
> On 1/30/2018 11:40 PM, Thomas Monjalon wrote:
> > 16/12/2017 01:13, Ferruh Yigit:
> >> Add "debug" target to build library with debug symbols and optimization
> >> disabled.
> >>
> >> This is shortcut for exiting method to compile with
> >> EXTRA_CFLAGS="-O0 -g3"
> >>
> >> Signed-off-by: Ferruh Yigit 
> >> ---
> >> +  debugbuild library with debug symbols
> > [...]
> >> +.PHONY: debug
> >> +debug:
> >> +  $(Q)$(MAKE) EXTRA_CFLAGS="-O0 -g3"
> > 
> > This target will override any EXTRA_CFLAGS,
> > so we cannot build in debug mode with more extra cflags this way.
> 
> How about EXTRA_CFLAGS+="-O0 -g3"
> which is not override EXTRA_CFLAGS but merge them, can be ok?

I am not sure it works. Have you tested?


Re: [dpdk-dev] [PATCH v3 1/6] test: fix memory leak in bitmap test

2018-02-02 Thread Thomas Monjalon
01/02/2018 18:04, Burakov, Anatoly:
> On 01-Feb-18 12:10 AM, Thomas Monjalon wrote:
> > 17/01/2018 12:15, Anatoly Burakov:
> >> Acked-by: Cristian Dumitrescu 
> >>
> >> Fixes: c7e4a134e769 ("test: verify bitmap operations")
> >> Cc: pbhagavat...@caviumnetworks.com
> >>
> >> Signed-off-by: Anatoly Burakov 
> > 
> > I think you missed to report some previous acks in this series.
> > 
> > 
> > 
> 
> Which ones were there? I can only see two new acks for v3 from Olivier. 
> Everything else seems in order, unless my email client is lying to me :)

There are some acks from Cristian.


Re: [dpdk-dev] [PATCH] net/tap: allow user MAC to be passed as args

2018-02-02 Thread Pascal Mazon
Hi,

You didn't address my request about not using a global value. Was there
a good reason?

I paste it here again as a reminder:

  Can you also not use a global value for user_mac, but instead change the
  last argument for eth_dev_tap_create():
  Use directly a char mac[ETHER_ADDR_LEN], automatic variable from
  rte_pmd_tap_probe().
  In set_mac_type(), you can check either for "fixed" or a correct custom
  mac address.
  Then eth_dev_tap_create() can check if the provided mac is empty (!fixed
  and !custom_mac), to generate a random one.

Additional comments inline.

Best regards,
Pascal

On 31/01/2018 19:22, Vipin Varghese wrote:
> Allow TAP PMD to pass user desired MAC address as argument.
> The argument value is processed as string, where each 2 bytes
> are converted to HEX MAC address after validation.
>
> Signed-off-by: Vipin Varghese 
> ---
>  doc/guides/nics/tap.rst   |  6 +
>  drivers/net/tap/rte_eth_tap.c | 62 
> +++
>  2 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index dc6f834..6b083c8 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -69,6 +69,12 @@ for each interface string containing ``mac=fixed``. The 
> MAC address is formatted
>  as 00:'d':'t':'a':'p':[00-FF]. Convert the characters to hex and you get the
>  actual MAC address: ``00:64:74:61:70:[00-FF]``.
>  
> +   --vdev=net_tap0,mac="00:64:74:61:70:11"
> +
> +The MAC address will have a user value passed as string. The MAC address is 
> in
> +format with delimeter ``:``. The string is byte converted to hex and you get
> +the actual MAC address: ``00:64:74:61:70:11``.
> +
>  It is possible to specify a remote netdevice to capture packets from by 
> adding
>  ``remote=foo1``, for example::
>  
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 29d6356..3489b04 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -49,7 +49,14 @@
>  #define ETH_TAP_MAC_ARG "mac"
>  #define ETH_TAP_MAC_FIXED   "fixed"
>  
> +#define ETH_TAP_MAC_STR_FXD 1
> +#define ETH_TAP_MAC_STR_USR 2
> +#define ETH_TAP_USR_MAC_FMT "xx:xx:xx:xx:xx:xx"
> +#define ETH_TAP_CMP_MAC_FMT "0123456789ABCDEFabcdef"
> +#define ETH_TAP_MAC_ARG_FMT "["ETH_TAP_MAC_FIXED "|" 
> ETH_TAP_USR_MAC_FMT"]"
> +
>  static struct rte_vdev_driver pmd_tap_drv;
> +static unsigned char user_mac[ETHER_ADDR_LEN];
>  
>  static const char *valid_arguments[] = {
>   ETH_TAP_IFACE_ARG,
> @@ -1397,13 +1404,20 @@ enum ioctl_mode {
>   pmd->txq[i].fd = -1;
>   }
>  
> - if (fixed_mac_type) {
> + if (fixed_mac_type == ETH_TAP_MAC_STR_FXD) {
>   /* fixed mac = 00:64:74:61:70: */
>   static int iface_idx;
>   char mac[ETHER_ADDR_LEN] = "\0dtap";
>  
>   mac[ETHER_ADDR_LEN - 1] = iface_idx++;
>   rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> + } else if (fixed_mac_type == ETH_TAP_MAC_STR_USR) {
> + RTE_LOG(INFO, PMD,
> + "%s; user MAC (%02x:%02x:%02x:%02x:%02x:%02x) 
> argument\n",
Shouldn't it be a colon there? "%s:"
> + pmd->name,
> + user_mac[0], user_mac[1], user_mac[2],
> + user_mac[3], user_mac[4], user_mac[5]);
> + rte_memcpy(&pmd->eth_addr, user_mac, ETHER_ADDR_LEN);
>   } else {
>   eth_random_addr((uint8_t *)&pmd->eth_addr);
>   }
> @@ -1577,10 +1591,48 @@ enum ioctl_mode {
>const char *value,
>void *extra_args)
>  {
> - if (value &&
> - !strncasecmp(ETH_TAP_MAC_FIXED, value, strlen(ETH_TAP_MAC_FIXED)))
> - *(int *)extra_args = 1;
> + char mac_temp[20] = {0}, *mac_byte = NULL;
Instead of hardcoded values, I'd use
mac_temp[strlen(ETH_TAP_USR_MAC_FMT) + 1]
> + unsigned int index = 0;
> +
> + if (!value)
> + return 0;
> +
> + if (!strncasecmp(ETH_TAP_MAC_FIXED, value,
> + strlen(ETH_TAP_MAC_FIXED))) {
> + *(int *)extra_args = ETH_TAP_MAC_STR_FXD;
> + goto success;
> + }
> +
> + if (strlen(value) == 17) {
And here 17 => strlen(ETH_TAP_USR_MAC_FMT)
> + strncpy(mac_temp, value, 18);
> + mac_temp[19] = '\0';
Instead of those two lines, I'd rather have snprintf(mac_temp,
sizeof(mac_temp), "%s", value).
It handles the trailing \0 nicely.
> + mac_byte = strtok(mac_temp, ":");
> +
> + while ((mac_byte != NULL) &&
> + strspn(mac_byte, ETH_TAP_CMP_MAC_FMT) &&
> + strspn((mac_byte + 1), ETH_TAP_CMP_MAC_FMT) &&
> + strlen(mac_byte) == 2) {
> + user_mac[index] = strtoul(mac_byte, NULL, 16);
> + mac_byte = strtok(NULL, ":");
> + inde

Re: [dpdk-dev] [PATCH] net/tap: allow user MAC to be passed as args

2018-02-02 Thread Varghese, Vipin
Hi Pascal,

Sincere apologizes, I think I missed out since rework was asked. Please find my 
answers inline to the comment

> -Original Message-
> From: Pascal Mazon [mailto:pascal.ma...@6wind.com]
> Sent: Friday, February 2, 2018 9:16 AM
> To: Varghese, Vipin ; dev@dpdk.org
> Cc: Yigit, Ferruh ; Jain, Deepak K
> 
> Subject: Re: [PATCH] net/tap: allow user MAC to be passed as args
> 
> Hi,
> 
> You didn't address my request about not using a global value. Was there a good
> reason?
> 
> I paste it here again as a reminder:
> 
>   Can you also not use a global value for user_mac, but instead change the
>   last argument for eth_dev_tap_create():
>   Use directly a char mac[ETHER_ADDR_LEN], automatic variable from
>   rte_pmd_tap_probe().
>   In set_mac_type(), you can check either for "fixed" or a correct custom
>   mac address.
>   Then eth_dev_tap_create() can check if the provided mac is empty (!fixed
>   and !custom_mac), to generate a random one.

Last argument for eth_dev_tap_create is ' int fixed_mac_type '. Would like me 
to change this to 'uint64_t fixed_mac_type' to accommodate the MAC address?

Note: Should we change the API arguments?

> 
> Additional comments inline.
> 
> Best regards,
> Pascal
> 
> On 31/01/2018 19:22, Vipin Varghese wrote:




> >  #define ETH_TAP_MAC_ARG "mac"
> >  #define ETH_TAP_MAC_FIXED   "fixed"
> >
> > +#define ETH_TAP_MAC_STR_FXD 1
> > +#define ETH_TAP_MAC_STR_USR 2
> > +#define ETH_TAP_USR_MAC_FMT "xx:xx:xx:xx:xx:xx"
> > +#define ETH_TAP_CMP_MAC_FMT "0123456789ABCDEFabcdef"
> > +#define ETH_TAP_MAC_ARG_FMT "["ETH_TAP_MAC_FIXED "|"
> ETH_TAP_USR_MAC_FMT"]"
> > +
> >  static struct rte_vdev_driver pmd_tap_drv;
> > +static unsigned char user_mac[ETHER_ADDR_LEN];
> >
> >  static const char *valid_arguments[] = {
> > ETH_TAP_IFACE_ARG,
> > @@ -1397,13 +1404,20 @@ enum ioctl_mode {
> > pmd->txq[i].fd = -1;
> > }
> >
> > -   if (fixed_mac_type) {
> > +   if (fixed_mac_type == ETH_TAP_MAC_STR_FXD) {
> > /* fixed mac = 00:64:74:61:70: */
> > static int iface_idx;
> > char mac[ETHER_ADDR_LEN] = "\0dtap";
> >
> > mac[ETHER_ADDR_LEN - 1] = iface_idx++;
> > rte_memcpy(&pmd->eth_addr, mac, ETHER_ADDR_LEN);
> > +   } else if (fixed_mac_type == ETH_TAP_MAC_STR_USR) {
> > +   RTE_LOG(INFO, PMD,
> > +   "%s; user MAC (%02x:%02x:%02x:%02x:%02x:%02x)
> argument\n",
> Shouldn't it be a colon there? "%s:"

Ok, I can make this change.



> > +   char mac_temp[20] = {0}, *mac_byte = NULL;
> Instead of hardcoded values, I'd use
> mac_temp[strlen(ETH_TAP_USR_MAC_FMT) + 1]

Ok, I can make this change.



> > +
> > +   if (strlen(value) == 17) {
> And here 17 => strlen(ETH_TAP_USR_MAC_FMT)

Ok

> > +   strncpy(mac_temp, value, 18);
> > +   mac_temp[19] = '\0';
> Instead of those two lines, I'd rather have snprintf(mac_temp,
> sizeof(mac_temp), "%s", value).
> It handles the trailing \0 nicely.

OK, I will check the same.

> > +   mac_byte = strtok(mac_temp, ":");





Re: [dpdk-dev] [PATCH] event/rx_adapter: fix ignore return of event start

2018-02-02 Thread Rao, Nikhil

> -Original Message-
> From: Varghese, Vipin
> Sent: Friday, February 2, 2018 1:39 PM
> To: Jerin Jacob ; Rao, Nikhil
> 
> Cc: dev@dpdk.org; Jacob, Jerin ; Van
> Haaren, Harry ; Hemant Agrawal
> ; Jain, Deepak K 
> Subject: RE: [PATCH] event/rx_adapter: fix ignore return of event start
> 
> 
> 
> > -Original Message-
> > From: Jerin Jacob [mailto:jerin.ja...@caviumnetworks.com]
> > Sent: Wednesday, January 31, 2018 6:54 AM
> > To: Rao, Nikhil 
> > Cc: Varghese, Vipin ; dev@dpdk.org; Jacob,
> > Jerin ; Van Haaren, Harry
> > ; Hemant Agrawal
> ;
> > Jain, Deepak K 
> > Subject: Re: [PATCH] event/rx_adapter: fix ignore return of event
> > start
> >
> > -Original Message-
> > >
> > >
> > > Adding eventdev PMD folks for their suggestions on how to handle the
> > > return
> > value from rte_event_dev_start() below.
> > >
> > > > -Original Message-
> > > > From: Varghese, Vipin
> > > > Sent: Wednesday, January 31, 2018 4:26 AM
> > > > To: dev@dpdk.org; Rao, Nikhil 
> > > > Cc: Jain, Deepak K ; Varghese, Vipin
> > > > 
> > > > Subject: [PATCH] event/rx_adapter: fix ignore return of event
> > > > start
> > > >
> > > > Capture the return value for rte_event_dev_start. Return the
> > > > result back to user.
> > > >
> > > > Coverity issue: 257000
> > > > Fixes: 9c38b704d280 ("eventdev: add eth Rx adapter
> > > > implementation")
> > > > Cc: nikhil@intel.com
> > > >
> > > > Signed-off-by: Vipin Varghese 
> > > > ---
> > > >  lib/librte_eventdev/rte_event_eth_rx_adapter.c | 4 ++--
> > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > index 90106e6..a818bef 100644
> > > > --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > @@ -603,7 +603,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
> > > > RTE_EDEV_LOG_ERR("failed to configure event dev %u\n",
> > > > dev_id);
> > > > if (started)
> > > > -   rte_event_dev_start(dev_id);
> > > > +   ret = rte_event_dev_start(dev_id);
> > >
> > > Currently the a non-zero return value at this point signifies an
> > > error returned
> > from rte_event_dev_configure(),  so I suggest that the return value is
> > typecasted to void.
> >
> > If I understand it correctly, Any one of the failure(configure() or
> > start()) should result in bad state. Right?
> > i.e If some reason PMD is not able to start() even after failure
> > configuration() would result in bad state.
> > If so, one option could be combine the error like ret |= operation or
> > create a new logical error in Rx adapter which denotes this new error.
> >
> 
> So do we agree to ACK these changes to get the code fix to the mainline? 

Sorry, if my original email wasn't clear,  if rte_event_dev_configure() returns 
an error and rte_eventdev_start() returns success that would be a problem, 
i.e., the fix is incorrect.

Of the 2 options suggested by Jerin - Since ret is not a bitmask  ret |= 
wouldn't work, if I understand the option correctly . A new error would work.

How about EIO ? and we also update the documentation to indicate that the event 
device would be in a stopped state if the return code is EIO.

> rework the logic as required?
> 
> > >
> > > > return ret;
> > > > }
> > > >
> > > > @@ -617,7 +617,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
> > > > conf->event_port_id = port_id;
> > > > conf->max_nb_rx = 128;
> > > > if (started)
> > > > -   rte_event_dev_start(dev_id);
> > > > +   ret = rte_event_dev_start(dev_id);
> > > This change looks good to me.
> > >
> > > > rx_adapter->default_cb_arg = 1;
> > > > return ret;
> > > >  }
> > > > --
> > > > 1.9.1
> > >


Re: [dpdk-dev] [PATCH V14 1/3] eal: add uevent monitor api and callback func

2018-02-02 Thread Guo, Jia



On 1/31/2018 8:44 AM, Stephen Hemminger wrote:

On Tue, 30 Jan 2018 20:20:58 +0800
Jeff Guo  wrote:


+   memset(&ep_kernel, 0, sizeof(struct epoll_event));
+   ep_kernel.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+   ep_kernel.data.fd = netlink_fd;
+   if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, netlink_fd,
+   &ep_kernel) < 0) {
+   RTE_LOG(ERR, EAL, "error addding fd to epoll: %m\n");
+   goto out;
+   }
+
+   while (!service_exit) {
+   int fdcount;
+   struct epoll_event ev[1];
+
+   fdcount = epoll_wait(fd_ep, ev, 1, -1);
+   if (fdcount < 0) {
+   if (errno != EINTR)
+   RTE_LOG(ERR, EAL, "error receiving uevent "
+   "message: %m\n");
+   continue;
+   }
+
+   /* epoll_wait has at least one fd ready to read */
+   if (dev_uev_process(ev, fdcount) < 0) {
+   if (errno != EINTR)
+   RTE_LOG(ERR, EAL, "error processing uevent "
+   "message: %m\n");
+   }
+   }

What is the point of the extra epoll here?
Why not just make netlink_fd blocking and do recv?
Rather than having two syscalls per event.
if device event monitor only monitor a netlink fd, that might be right 
not need to add extra epoll, let me think about that if it is need to 
restore for future advance or just make it simpler. thanks , stephen.




Re: [dpdk-dev] [PATCH] doc: announce ABI change for crypto info struct

2018-02-02 Thread Verma, Shally


>-Original Message-
>From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
>Sent: 02 February 2018 14:38
>To: Verma, Shally ; Akhil Goyal 
>; Trahe, Fiona ;
>hemant.agra...@nxp.com; Doherty, Declan ; Griffin, 
>John ; Jain, Deepak K
>; j...@semihalf.com; t...@semihalf.com; 
>d...@marvell.com; nsams...@marvell.com;
>jianbo@arm.com; Jacob, Jerin ; 
>Athreya, Narayana Prasad
>; Murthy, Nidadavolu 
>
>Cc: dev@dpdk.org
>Subject: RE: [dpdk-dev] [PATCH] doc: announce ABI change for crypto info struct
>
>Hi Shally,
>
>> -Original Message-
>> From: Verma, Shally [mailto:shally.ve...@cavium.com]
>> Sent: Tuesday, January 30, 2018 11:54 AM
>> To: De Lara Guarch, Pablo ; Akhil Goyal
>> ; Trahe, Fiona ;
>> hemant.agra...@nxp.com; Doherty, Declan ;
>> Griffin, John ; Jain, Deepak K
>> ; j...@semihalf.com; t...@semihalf.com;
>> d...@marvell.com; nsams...@marvell.com; jianbo@arm.com; Jacob,
>> Jerin ; Athreya, Narayana Prasad
>> ; Murthy, Nidadavolu
>> 
>> Cc: dev@dpdk.org
>> Subject: RE: [dpdk-dev] [PATCH] doc: announce ABI change for crypto info
>> struct
>>
>>
>>
>> >-Original Message-
>> >From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
>> >Sent: 30 January 2018 16:51
>> >To: Verma, Shally ; Akhil Goyal
>> >; Trahe, Fiona ;
>> >hemant.agra...@nxp.com; Doherty, Declan ;
>> >Griffin, John ; Jain, Deepak K
>> >; j...@semihalf.com; t...@semihalf.com;
>> >d...@marvell.com; nsams...@marvell.com; jianbo@arm.com; Jacob,
>> >Jerin ; Athreya, Narayana Prasad
>> >; Murthy, Nidadavolu
>> >
>> >Cc: dev@dpdk.org
>> >Subject: RE: [dpdk-dev] [PATCH] doc: announce ABI change for crypto
>> >info struct
>> >
>> >Hi Shally/Ahkil,
>> >
>> >> -Original Message-
>> >> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Verma, Shally
>> >> Sent: Tuesday, January 30, 2018 7:56 AM
>> >> To: Akhil Goyal ; De Lara Guarch, Pablo
>> >> ; Trahe, Fiona
>> >> ; hemant.agra...@nxp.com; Doherty, Declan
>> >> ; Griffin, John ;
>> >> Jain, Deepak K ; j...@semihalf.com;
>> >> t...@semihalf.com; d...@marvell.com; nsams...@marvell.com;
>> >> jianbo@arm.com; Jacob, Jerin
>> >> ; Athreya, Narayana Prasad
>> >> ; Murthy, Nidadavolu
>> >> 
>> >> Cc: dev@dpdk.org
>> >> Subject: Re: [dpdk-dev] [PATCH] doc: announce ABI change for crypto
>> >> info struct
>> >>
>> >> I do see current cryptodev unit testcase (inside \test dir) uses
>> >> info.sym.max_nb_sessions param for session mempool_create. So, such
>> >> testcases change are also in proposal?
>> >
>> >Yes, for these tests, we can just define a macro in the tests, instead of
>> using the info structure.
>>
>> [Shally] Ok, then you mean applications will choose any random number
>> during mempool_create and not dependent on device max_nb_sessions?
>
>Yes, actually for the unit tests, even one session is enough.
>
>>
>> >>
>> >> Another point, we recently submitted an RFC patch on lib/cryptodev
>> >> with asymmetric crypto support
>> >> (https://dpdk.org/dev/patchwork/patch/34308/) which is awaiting
>> >> review and these fields have role to play there.
>> >> So, could this change be please viewed in conjunction with asym RFC?
>> >
>> >Do you need it for asymmetric? Anyway, this would remove the
>> symmetric function and structures, not applicable for you.
>>
>> [Shally] I would say addition of asym in lib/cryptodev is not entirely
>> standalone, specifically for PMDs that can support both.
>> My key concern are max_nb_sessions_per_qp and related
>> qp_attach_sym/asym APIs which enable management of queue distribution
>> among sym and asym in current proposal, specifically, for PMDs that can
>> support both but have dedicated qp for each. Right now proposal is open
>> for feedback and would prefer to be covered before sym related changes
>> could be applied.
>
>Actually, I have been thinking about this. Given the time we have until 18.02 
>is out,
>and that this is not urgent to be applied (this is just code cleanup),
>I am postponing this until next release.
>
[Shally] Ok. Thanks for acknowledging this.

>My other reason is that the info structure has a rte_pci_device pointer which 
>should be removed.
>However, I believe it is better to leave it for next release and discuss it 
>with other libraries which has this, like ethdev.
>
>Thanks,
>Pablo



Re: [dpdk-dev] [PATCH] net/i40e: fix VF testpmd startup failure issue

2018-02-02 Thread Xing, Beilei


> -Original Message-
> From: Li, Xiaoyun
> Sent: Friday, February 2, 2018 1:45 PM
> To: Xing, Beilei 
> Cc: dev@dpdk.org; Li, Xiaoyun ; sta...@dpdk.org
> Subject: [PATCH] net/i40e: fix VF testpmd startup failure issue
> 
> New testpmd will get CRC strip offload from rx_offload_capa. I40evf cannot
> disable CRC strip. And in fact, it is enabled by PF. This patch solves the 
> issue
> by adding CRC strip flag into rx_offload_capa in i40e and i40evf.
> 
> Fixes: 8b9bd0efe0b6 ("app/testpmd: disable Rx VLAN offloads by default")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Xiaoyun Li 
> ---
>  drivers/net/i40e/i40e_ethdev.c| 3 ++-
>  drivers/net/i40e/i40e_ethdev_vf.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 7e3d1a8..403831d 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -3083,7 +3083,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct
> rte_eth_dev_info *dev_info)
>   DEV_RX_OFFLOAD_QINQ_STRIP |
>   DEV_RX_OFFLOAD_IPV4_CKSUM |
>   DEV_RX_OFFLOAD_UDP_CKSUM |
> - DEV_RX_OFFLOAD_TCP_CKSUM;
> + DEV_RX_OFFLOAD_TCP_CKSUM |
> + DEV_RX_OFFLOAD_CRC_STRIP;
>   dev_info->tx_offload_capa =
>   DEV_TX_OFFLOAD_VLAN_INSERT |
>   DEV_TX_OFFLOAD_QINQ_INSERT |
> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> b/drivers/net/i40e/i40e_ethdev_vf.c
> index 57f7613..169e1b1 100644
> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> @@ -2194,7 +2194,8 @@ i40evf_dev_info_get(struct rte_eth_dev *dev,
> struct rte_eth_dev_info *dev_info)
>   DEV_RX_OFFLOAD_QINQ_STRIP |
>   DEV_RX_OFFLOAD_IPV4_CKSUM |
>   DEV_RX_OFFLOAD_UDP_CKSUM |
> - DEV_RX_OFFLOAD_TCP_CKSUM;
> + DEV_RX_OFFLOAD_TCP_CKSUM |
> + DEV_RX_OFFLOAD_CRC_STRIP;
>   dev_info->tx_offload_capa =
>   DEV_TX_OFFLOAD_VLAN_INSERT |
>   DEV_TX_OFFLOAD_QINQ_INSERT |
> --
> 2.7.4

Acked-by: Beilei Xing , thanks.



Re: [dpdk-dev] [PATCH v3 1/6] test: fix memory leak in bitmap test

2018-02-02 Thread Burakov, Anatoly

On 02-Feb-18 9:08 AM, Thomas Monjalon wrote:

01/02/2018 18:04, Burakov, Anatoly:

On 01-Feb-18 12:10 AM, Thomas Monjalon wrote:

17/01/2018 12:15, Anatoly Burakov:

Acked-by: Cristian Dumitrescu 

Fixes: c7e4a134e769 ("test: verify bitmap operations")
Cc: pbhagavat...@caviumnetworks.com

Signed-off-by: Anatoly Burakov 


I think you missed to report some previous acks in this series.





Which ones were there? I can only see two new acks for v3 from Olivier.
Everything else seems in order, unless my email client is lying to me :)


There are some acks from Cristian.


They are all already in the patchset.

--
Thanks,
Anatoly


Re: [dpdk-dev] [PATCH v2 0/5] Fix meson build on FreeBSD

2018-02-02 Thread Bruce Richardson
On Thu, Feb 01, 2018 at 02:20:06PM +, Bruce Richardson wrote:
> There are a few issues with building DPDK for FreeBSD using the
> meson build system, specifically:
> * the kernel modules aren't compiling due to an incorrect VPATH
> * a number of unit tests depend on libraries not supported on BSD
> * applications and examples need to be linked with execinfo library.
> 
> 
> V2: merged patch 6 in with patch 2, since it's the same fix for main apps
> and for the examples.
> 
> Bruce Richardson (5):
>   eal/bsdapp: fix building kernel modules
>   build: fix dependency on execinfo for BSD meson builds
>   test/test: mark tests as skipped when required lib not available
>   test/test: fix dependency on power lib for BSD meson build
>   test/test: fix dependency on KNI lib for BSD meson build
> 
>  app/test-eventdev/meson.build   |  1 +
>  app/test-pmd/meson.build|  1 +
>  examples/meson.build|  3 ++-
>  lib/librte_eal/bsdapp/BSDmakefile.meson |  1 +
>  lib/librte_eal/meson.build  |  1 -
>  test/test/meson.build   |  8 +++-
>  test/test/test_kni.c| 13 +
>  test/test/test_power.c  | 12 
>  test/test/test_power_acpi_cpufreq.c | 11 +++
>  test/test/test_power_kvm_vm.c   | 11 +++
>  10 files changed, 59 insertions(+), 3 deletions(-)
> 
> -- 
Applied to dpdk-next-build

/Bruce


Re: [dpdk-dev] IXGBE, IOMMU DMAR DRHD handling fault issue

2018-02-02 Thread Burakov, Anatoly

On 01-Feb-18 7:26 PM, Ravi Kerur wrote:



On Thu, Feb 1, 2018 at 2:10 AM, Burakov, Anatoly 
mailto:anatoly.bura...@intel.com>> wrote:


On 31-Jan-18 9:51 PM, Ravi Kerur wrote:


Hi Anatoly,

Thanks. I am following wiki link below which uses vIOMMU with
DPDK as a use-case and instantiate VM as specified with Q35
chipset in Qemu.

https://wiki.qemu.org/Features/VT-d


Qemu-version is 2.11
Host kernel 4.9
Guest kernel 4.4

I can only guess that guest kernel needs an upgrade in my setup
to work correctly, if versions on my setup rings a bell on not
having support kindly let me know.

When 'modprobe vfio enable_unsafe_noiommu_node=Y' is executed on
guest I get following error
...
vfio: unknown parameter 'enable_unsafe_noiommu_node' ignored
...

in guest.

Thanks.


AFAIK kernel 4.4 should have noiommu mode - it was introduced in
3.1x days. However, in order for that to work, kernel also has to be
built with this mode enabled. My guess is, whoever is the supplier
of your kernel, did not do that. You should double-check the kernel
configuration of your distribution.

However, if you have vIOMMU in QEMU, you shouldn't need noiommu mode
- "regular" vfio should work fine. noiommu mode should only be
needed if you know you don't have IOMMU enabled in your kernel, and
even if you can't enable it, you can still use igb_uio.

Hi Anatoly,

Do you suggest I take this discussion to kvm/qemu mailing list as I am 
not sure which component has the issue? I check dmesg for BIOS physical 
memory map and address reported as fault by DMAR is reported by BIOS as 
usable on both host and vm.


[ 4539.597737] DMAR: [DMA Read] Request device [04:10.0] fault addr 
*33a128000 *[fault reason 06] PTE Read access is not set


dmesg | grep BIOS
[    0.00] e820: BIOS-provided physical RAM map:
[    0.00] BIOS-e820: [mem 0x-0x0009afff] usable
[    0.00] BIOS-e820: [mem 0x0009b000-0x0009] 
reserved
[    0.00] BIOS-e820: [mem 0x000e-0x000f] 
reserved

[    0.00] BIOS-e820: [mem 0x0010-0x7938afff] usable
[    0.00] BIOS-e820: [mem 0x7938b000-0x7994bfff] 
reserved
[    0.00] BIOS-e820: [mem 0x7994c000-0x7999cfff] 
ACPI data
[    0.00] BIOS-e820: [mem 0x7999d000-0x79f7dfff] 
ACPI NVS
[    0.00] BIOS-e820: [mem 0x79f7e000-0x7bd37fff] 
reserved

[    0.00] BIOS-e820: [mem 0x7bd38000-0x7bd38fff] usable
[    0.00] BIOS-e820: [mem 0x7bd39000-0x7bdbefff] 
reserved

[    0.00] BIOS-e820: [mem 0x7bdbf000-0x7bff] usable
[    0.00] BIOS-e820: [mem 0x7c00-0x8fff] 
reserved
[    0.00] BIOS-e820: [mem 0xfed1c000-0xfed44fff] 
reserved
[    0.00] BIOS-e820: [mem 0xff00-0x] 
reserved
[*    0.00] BIOS-e820: [mem 0x0001-0x00407fff] 
usable*

*
*
Kindly let me know your inputs.

Thanks.


-- 
Thanks,

Anatoly




The "PTE Read not set" error usually indicates that you are trying to 
use a non-IOMMU method when you have IOMMU enabled (i.e. trying to use 
igb_uio when IOMMU is on). That, to me, indicates that you do have IOMMU 
emulation enabled.


I would go about it this way.

First, i'd ensure that your VM has IOMMU emulation enabled and working. 
You have mentioned that your QEMU version should have IOMMU emulation, 
so let's assume that's the case.


I am not sure of the exact command-line needed to activate the vIOMMU 
emulation, but assuming your VM emulates an Intel processor, your kernel 
command-line should have "iommu=on intel_iommu=on" in it. Check 
/etc/default/grub for GRUB_CMDLINE_LINUX_DEFAULT value, and if the above 
values are not in there, add the above changes, do "update-grub" and 
reboot your VM.


If it already did have the necessary kernel configuration, do "dmesg | 
grep IOMMU" and look for "IOMMU Enabled". That should tell you that 
IOMMU is enabled and working in the kernel.


After that, you can modprobe vfio and vfio-pci, bind NICs to it, and it 
should be working. Please bear in mind that all of that is how i 
would've gone about it if i had similar problems on baremetal, but i'm 
hoping all of it is applicable to VM's. So, either disable IOMMU and use 
igb_uio, or enable IOMMU and use VFIO. Both will work.


--
Thanks,
Anatoly


Re: [dpdk-dev] [RFC v3] Compression API in DPDK :SW ZLIB PMD

2018-02-02 Thread Verma, Shally
HI Fiona 

>-Original Message-
>From: Verma, Shally
>Sent: 12 January 2018 19:24
>To: 'Trahe, Fiona' ; dev@dpdk.org
>Cc: Gupta, Ashish ; Sahu, Sunila 
>; Challa, Mahipal
>; Athreya, Narayana Prasad 
>; De Lara Guarch, Pablo
>; Jain, Deepak K ; 
>Roy Pledge ; Youri Querry
>; Hemant Agrawal ; Ahmed 
>Mansour ; De Lara
>Guarch, Pablo 
>Subject: RE: [RFC v3] Compression API in DPDK :SW ZLIB PMD
>
>Hi Fiona
>
>> I think a common draft repo would be the best way forward. Let's talk to the 
>> maintainers about getting one set up.
>Sure. Please share us details when it is available. I assume it will be 
>created once API spec move from RFC to 1st version?
>
[Shally] Any update on this? We're available with sample SW ZLIB based 
implementation (currently proof-concepted for stateless only) . So, if any repo 
is available, we can upload it there.

>Thanks
>Shally
>
>From: Trahe, Fiona [mailto:fiona.tr...@intel.com]
>Sent: 12 January 2018 00:31
>To: Verma, Shally ; dev@dpdk.org
>Cc: Gupta, Ashish ; Sahu, Sunila 
>; Challa, Mahipal
>; Athreya, Narayana Prasad 
>; De Lara Guarch, Pablo
>; Jain, Deepak K ; 
>Roy Pledge ; Youri Querry
>; Hemant Agrawal ; Ahmed 
>Mansour ; Trahe,
>Fiona ; De Lara Guarch, Pablo 
>
>Subject: RE: [RFC v3] Compression API in DPDK :SW ZLIB PMD
>
>Hi Shally,
>
>
>From: Verma, Shally [mailto:shally.ve...@cavium.com]
>Sent: Wednesday, January 10, 2018 8:33 AM
>To: Trahe, Fiona ; mailto:dev@dpdk.org
>Cc: Gupta, Ashish ; Sahu, Sunila 
>; Challa, Mahipal
>; Athreya, Narayana Prasad 
>; De Lara Guarch,
>Pablo ; Jain, Deepak K 
>; Roy Pledge
>; Youri Querry ; 
>Hemant Agrawal
>; Ahmed Mansour 
>Subject: [RFC v3] Compression API in DPDK :SW ZLIB PMD
>
>Hi Fiona
>
>We are planning to implement ZLIB based SW PMD to proof-concept DPDK 
>compression RFC v3 API spec internally. However, would
>like to check
>If you're working upon similar in parallel and if yes, then what's your 
>development roadmap / strategy so that we could see if we
>could leverage joint effort.
>Depending upon your feedback, we can see if we can have some common repo for 
>joint development or send it as RFC patch.
>
>Let me know your opinion on same.
>[Fiona] We have not started a zlib based SW PMD, and would be delighted if you 
>would do this, I agree it's a
>good way to prove out the API.
>We are writing some unit tests against the API, we would be happy to share 
>these with you, and continue to develop these
>jointly so the same test sets can be targeted against all PMDs.
>I think a common draft repo would be the best way forward. Let's talk to the 
>maintainers about getting one set up.
>
>Thanks
>Shally



[dpdk-dev] [PATCH 0/7] vhost: support selective datapath

2018-02-02 Thread Zhihong Wang
This patch set introduces support for selective datapath in DPDK vhost-user
lib. vDPA stands for vhost Data Path Acceleration. The idea is to enable
various types of virtio-compatible devices to do data transfer with virtio
driver directly to enable acceleration.

The default datapath is the existing software implementation, more options
will be available when new engines are added.

Design details


An engine is a group of virtio-compatible devices. The definition of engine
is as follows:

struct rte_vdpa_eng_addr {
union {
uint8_t __dummy[64];

struct {
struct rte_pci_addr pci_addr;
};
};
};

struct rte_vdpa_eng_info {
char name[MAX_VDPA_NAME_LEN];
struct rte_vdpa_eng_addr *addr;
};

struct rte_vdpa_dev_ops {
vdpa_dev_conf_tdev_conf;
vdpa_dev_close_t   dev_close;
vdpa_vring_state_set_t vring_state_set;
vdpa_feature_set_t feature_set;
vdpa_migration_done_t  migration_done;
};

struct rte_vdpa_eng_ops {
vdpa_eng_init_t   eng_init;
vdpa_eng_uninit_t eng_uninit;
vdpa_info_query_t info_query;
};

struct rte_vdpa_eng_driver {
const char *name;
struct rte_vdpa_eng_ops eng_ops;
struct rte_vdpa_dev_ops dev_ops;
} __rte_cache_aligned;

struct rte_vdpa_engine {
struct rte_vdpa_eng_infoeng_info;
struct rte_vdpa_eng_driver *eng_drv;
} __rte_cache_aligned;

A set of engine ops is defined in rte_vdpa_eng_ops for engine init, uninit,
and attributes reporting. The attributes are defined as follows:

struct rte_vdpa_eng_attr {
uint64_t features;
uint64_t protocol_features;
uint32_t queue_num;
uint32_t dev_num;
};

A set of device ops is defined in rte_vdpa_dev_ops for each virtio device
in the engine to do device specific operations.

Changes to the current vhost-user lib are:


 1. Make vhost device capabilities configurable to adopt various engines.
Such capabilities include supported features, protocol features, queue
number. APIs are introduced to let app configure these capabilities.

 2. In addition to the existing vhost framework, a set of callbacks is
added for vhost to call the driver for device operations at the right
time:

 a. dev_conf: Called to configure the actual device when the virtio
device becomes ready.

 b. dev_close: Called to close the actual device when the virtio device
is stopped.

 c. vring_state_set: Called to change the state of the vring in the
actual device when vring state changes.

 d. feature_set: Called to set the negotiated features to device.

 e. migration_done: Called to allow the device to response to RARP
sending.

 3. To make vhost aware of its own type, an engine id (eid) and a device
id (did) are added into the vhost data structure to identify the actual
device. APIs are introduced to let app configure them. When the default
software datapath is used, eid and did are set to -1. When alternative
datapath is used, eid and did are set by app to specify which device to
use. Each vhost-user socket can have only 1 connection in this case.

Working process:


 1. Register driver during DPDK initialization.

 2. Register engine with driver name and address.

 3. Get engine attributes.

 4. For vhost device creation:

  a. Register vhost-user socket.

  b. Set eid and did of the vhost-user socket.

  c. Set attributes of the vhost-user socket.

  d. Register vhost-user callbacks.

  e. Start to wait for connection.

 4. When connection comes and virtio device data structure is negotiated,
configure the device with all needed info.

Zhihong Wang (7):
  vhost: make capabilities configurable
  vhost: expose vhost feature definitions
  vhost: support selective datapath
  vhost: add apis for datapath configuration
  vhost: adapt vhost lib for selective datapath
  vhost: get callfd before device setup
  vhost: expose new apis

 lib/librte_vhost/Makefile  |   4 +-
 lib/librte_vhost/rte_vdpa.h| 119 +++
 lib/librte_vhost/rte_vhost.h   | 136 +++
 lib/librte_vhost/rte_vhost_version.map |  18 
 lib/librte_vhost/socket.c  | 145 +
 lib/librte_vhost/vdpa.c| 125 
 lib/librte_vhost/vhost.c   |  49 +++
 lib/librte_vhost/vhost.h   |  14 +++-
 lib/librte_vhost/vhost_user.c  | 108 +++-
 lib/librte_vhost/vhost_user.h  |  20 ++---
 10 files changed, 700 insertions(+), 38 deletions(-)
 create mode 100644 lib/librte_vhost/rte_vdpa.h
 create mode 100644 lib/librte_vhost/vdpa.c

-- 
2.7.5



[dpdk-dev] [PATCH 1/7] vhost: make capabilities configurable

2018-02-02 Thread Zhihong Wang
This patch makes vhost device capabilities configurable to adopt new
devices, since different devices may have different capabilities, like
different combinations of supported features, or different number of
queues. APIs are introduced to let app configure these capabilities.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/rte_vhost.h  | 50 
 lib/librte_vhost/socket.c | 77 +++
 lib/librte_vhost/vhost_user.c | 48 ---
 3 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d332069..12cf48f 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -182,6 +182,56 @@ int rte_vhost_driver_unregister(const char *path);
 int rte_vhost_driver_set_features(const char *path, uint64_t features);
 
 /**
+ * Get the protocol feature bits.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param protocol_features
+ *  A pointer to store the queried protocol feature bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_get_protocol_features(const char *path,
+   uint64_t *protocol_features);
+
+/**
+ * Set the protocol feature bits the vhost-user driver supports.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param protocol_features
+ *  Supported protocol features
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_set_protocol_features(const char *path,
+   uint64_t protocol_features);
+
+/**
+ * Get the queue number.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param queue_num
+ *  A pointer to store the queried queue number
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_get_queue_num(const char *path, uint16_t *queue_num);
+
+/**
+ * Set the queue number the vhost-user driver supports.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param queue_num
+ *  Supported queue number
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_set_queue_num(const char *path, uint16_t queue_num);
+
+/**
  * Enable vhost-user driver features.
  *
  * Note that
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 6e3857e..e1d0036 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -49,7 +49,10 @@ struct vhost_user_socket {
 * features negotiation.
 */
uint64_t supported_features;
+   uint64_t supported_protocol_features;
uint64_t features;
+   uint64_t protocol_features;
+   uint16_t queue_num;
 
struct vhost_device_ops const *notify_ops;
 };
@@ -593,6 +596,75 @@ rte_vhost_driver_get_features(const char *path, uint64_t 
*features)
}
 }
 
+int rte_vhost_driver_set_protocol_features(const char *path,
+   uint64_t protocol_features)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket) {
+   vsocket->supported_protocol_features = protocol_features;
+   vsocket->protocol_features = protocol_features;
+   }
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_get_protocol_features(const char *path,
+   uint64_t *protocol_features)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   *protocol_features = vsocket->protocol_features;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   if (!vsocket) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "socket file %s is not registered yet.\n", path);
+   return -1;
+   } else {
+   return 0;
+   }
+}
+
+int rte_vhost_driver_set_queue_num(const char *path, uint16_t queue_num)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   vsocket->queue_num = queue_num;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return vsocket ? 0 : -1;
+}
+
+int rte_vhost_driver_get_queue_num(const char *path, uint16_t *queue_num)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   *queue_num = vsocket->queue_num;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   if (!vsocket) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "socket file %s is not registered yet.\n", path);
+   return -1;
+   } else {
+   return 0;
+   }
+}
+
 /*
  * Register a new vhost-user socket; here we could act as server
  * (the default case), or client (when RTE_VHO

[dpdk-dev] [PATCH 3/7] vhost: support selective datapath

2018-02-02 Thread Zhihong Wang
This patch introduces support for selective datapath in DPDK vhost-user lib
to enable various types of virtio-compatible devices to do data transfer
with virtio driver directly to enable acceleration. The default datapath is
the existing software implementation, more options will be available when
new engines are registered.

An engine is a group of virtio-compatible devices under a single address.
The engine driver includes:

 1. A set of engine ops is defined in rte_vdpa_eng_ops to perform engine
init, uninit, and attributes reporting.

 2. A set of device ops is defined in rte_vdpa_dev_ops for virtio devices
in the engine to do device specific operations:

 a. dev_conf: Called to configure the actual device when the virtio
device becomes ready.

 b. dev_close: Called to close the actual device when the virtio device
is stopped.

 c. vring_state_set: Called to change the state of the vring in the
actual device when vring state changes.

 d. feature_set: Called to set the negotiated features to device.

 e. migration_done: Called to allow the device to response to RARP
sending.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/Makefile   |   4 +-
 lib/librte_vhost/rte_vdpa.h | 113 +++
 lib/librte_vhost/vdpa.c | 125 
 3 files changed, 240 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/rte_vdpa.h
 create mode 100644 lib/librte_vhost/vdpa.c

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6ab..37044ac 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -22,9 +22,9 @@ LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev 
-lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
-   vhost_user.c virtio_net.c
+   vhost_user.c virtio_net.c vdpa.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
new file mode 100644
index 000..729849b
--- /dev/null
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VDPA_H_
+#define _RTE_VDPA_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include 
+#include 
+#include "rte_vhost.h"
+
+#define MAX_VDPA_ENGINE_NUM 128
+#define MAX_VDPA_NAME_LEN 128
+
+
+struct rte_vdpa_eng_addr {
+   union {
+   uint8_t __dummy[64];
+
+   struct {
+   struct rte_pci_addr pci_addr;
+   };
+   };
+};
+
+struct rte_vdpa_eng_info {
+   char name[MAX_VDPA_NAME_LEN];
+   struct rte_vdpa_eng_addr *addr;
+};
+
+struct rte_vdpa_eng_attr {
+   uint64_t features;
+   uint64_t protocol_features;
+   uint32_t queue_num;
+   uint32_t dev_num;
+};
+
+/* register/remove engine */
+typedef int (*vdpa_eng_init_t)(int eid, struct rte_vdpa_eng_addr *addr);
+typedef int (*vdpa_eng_uninit_t)(int eid);
+
+/* query info of this engine */
+typedef int (*vdpa_info_query_t)(int eid,
+   struct rte_vdpa_eng_attr *attr);
+
+/* driver configure/close the port based on connection */
+typedef int (*vdpa_dev_conf_t)(int vid);
+typedef int (*vdpa_dev_close_t)(int vid);
+
+/* enable/disable this vring */
+typedef int (*vdpa_vring_state_set_t)(int vid, int vring, int state);
+
+/* set features when changed */
+typedef int (*vdpa_feature_set_t)(int vid);
+
+/* destination operations when migration done, e.g. send rarp */
+typedef int (*vdpa_migration_done_t)(int vid);
+
+/* device ops */
+struct rte_vdpa_dev_ops {
+   vdpa_dev_conf_tdev_conf;
+   vdpa_dev_close_t   dev_close;
+   vdpa_vring_state_set_t vring_state_set;
+   vdpa_feature_set_t feature_set;
+   vdpa_migration_done_t  migration_done;
+};
+
+/* engine ops */
+struct rte_vdpa_eng_ops {
+   vdpa_eng_init_t eng_init;
+   vdpa_eng_uninit_t eng_uninit;
+   vdpa_info_query_t info_query;
+};
+
+struct rte_vdpa_eng_driver {
+   const char *name;
+   struct rte_vdpa_eng_ops eng_ops;
+   struct rte_vdpa_dev_ops dev_ops;
+} __rte_cache_aligned;
+
+struct rte_vdpa_engine {
+   struct rte_vdpa_eng_info eng_info;
+   struct rte_vdpa_eng_driver *eng_drv;
+} __rte_cache_aligned;
+
+extern struct rte_vdpa_engine *vdpa_engines[];
+extern uint32_t vdpa_engine_num;
+
+/* engine management */
+int rte_vdpa_register_engine(const char *name, struct rte_vdpa_eng_addr *addr);
+int rte_vdpa_unregister_engine(int eid);
+
+int rte_vdpa_find_engine_id(struct rte_vdpa_eng_addr *addr);
+
+int rte_vdpa_info_query(int eid, struct rte_vdpa_eng_attr *attr);
+
+/* d

[dpdk-dev] [PATCH 2/7] vhost: export vhost feature definitions

2018-02-02 Thread Zhihong Wang
This patch exports vhost-user protocol features to support device driver
development.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/rte_vhost.h  |  8 
 lib/librte_vhost/vhost.h  |  4 +---
 lib/librte_vhost/vhost_user.c |  9 +
 lib/librte_vhost/vhost_user.h | 20 +++-
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 12cf48f..6c92580 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -29,6 +29,14 @@ extern "C" {
 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY   (1ULL << 2)
 #define RTE_VHOST_USER_IOMMU_SUPPORT   (1ULL << 3)
 
+#define RTE_VHOST_USER_PROTOCOL_F_MQ   0
+#define RTE_VHOST_USER_PROTOCOL_F_LOG_SHMFD1
+#define RTE_VHOST_USER_PROTOCOL_F_RARP 2
+#define RTE_VHOST_USER_PROTOCOL_F_REPLY_ACK3
+#define RTE_VHOST_USER_PROTOCOL_F_NET_MTU  4
+#define RTE_VHOST_USER_PROTOCOL_F_SLAVE_REQ5
+#define RTE_VHOST_USER_F_PROTOCOL_FEATURES 30
+
 /**
  * Information relating to memory regions including offsets to
  * addresses in QEMUs memory file.
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 646aad3..09a745d 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -172,8 +172,6 @@ struct vhost_msg {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-
 /* Features supported by this builtin vhost-user net driver. */
 #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
@@ -183,7 +181,7 @@ struct vhost_msg {
(1ULL << VIRTIO_NET_F_MQ)  | \
(1ULL << VIRTIO_F_VERSION_1)   | \
(1ULL << VHOST_F_LOG_ALL)  | \
-   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+   (1ULL << RTE_VHOST_USER_F_PROTOCOL_FEATURES) | \
(1ULL << VIRTIO_NET_F_GSO) | \
(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 87ba267..b1762e6 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -510,7 +510,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, 
VhostUserMsg *msg)
vring_invalidate(dev, vq);
 
if (vq->enabled && (dev->features &
-   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
+   (1ULL << RTE_VHOST_USER_F_PROTOCOL_FEATURES))) {
dev = translate_ring_addresses(dev, msg->payload.state.index);
if (!dev)
return -1;
@@ -847,11 +847,11 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, 
struct VhostUserMsg *pmsg)
vq = dev->virtqueue[file.index];
 
/*
-* When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
+* When RTE_VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
 * the ring starts already enabled. Otherwise, it is enabled via
 * the SET_VRING_ENABLE message.
 */
-   if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
+   if (!(dev->features & (1ULL << RTE_VHOST_USER_F_PROTOCOL_FEATURES)))
vq->enabled = 1;
 
if (vq->kickfd >= 0)
@@ -961,7 +961,8 @@ vhost_user_get_protocol_features(struct virtio_net *dev)
 * Qemu versions (from v2.7.0 to v2.9.0).
 */
if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-   protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK);
+   protocol_features &=
+   ~(1ULL << RTE_VHOST_USER_PROTOCOL_F_REPLY_ACK);
 
return protocol_features;
 }
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index d4bd604..58e475d 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -14,19 +14,13 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
-#define VHOST_USER_PROTOCOL_F_MQ   0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD1
-#define VHOST_USER_PROTOCOL_F_RARP 2
-#define VHOST_USER_PROTOCOL_F_REPLY_ACK3
-#define VHOST_USER_PROTOCOL_F_NET_MTU 4
-#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
-
-#define VHOST_USER_PROTOCOL_FEATURES   ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
-(1ULL << 
VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
-(1ULL << VHOST_USER_PROTOCOL_F_RARP) | 
\
-(1ULL << 
VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
-(1ULL << 
VHOST_USER_PROTOCOL_F_NET_MTU) | \
-(1ULL << 
VHOST_USER_PROTOCOL_F_SLAVE_REQ))
+#define VHOST_USER_PROTOCOL_FEATURES \
+   

[dpdk-dev] [PATCH 4/7] vhost: add apis for datapath configuration

2018-02-02 Thread Zhihong Wang
This patch adds APIs for datapath configuration. The eid and did of the
vhost-user socket can be configured to identify the actual device.

When the default software datapath is used, eid and did are set to -1.
When alternative datapath is used, eid and did are set by app to specify
which device to use. Each vhost-user socket can have only 1 connection in
this case.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/rte_vhost.h | 68 
 lib/librte_vhost/socket.c| 65 ++
 lib/librte_vhost/vhost.c | 44 
 lib/librte_vhost/vhost.h | 10 +++
 4 files changed, 187 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 6c92580..03f4ed1 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -178,6 +178,50 @@ int rte_vhost_driver_register(const char *path, uint64_t 
flags);
 int rte_vhost_driver_unregister(const char *path);
 
 /**
+ * Set the engine id, enforce single connection per socket
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param eid
+ *  Engine id
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_set_vdpa_eid(const char *path, int eid);
+
+/**
+ * Set the device id, enforce single connection per socket
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param did
+ *  Device id
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_driver_set_vdpa_did(const char *path, int did);
+
+/**
+ * Get the engine id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  Engine id, -1 on failure
+ */
+int rte_vhost_driver_get_vdpa_eid(const char *path);
+
+/**
+ * Get the device id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  Device id, -1 on failure
+ */
+int rte_vhost_driver_get_vdpa_did(const char *path);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
@@ -492,6 +536,30 @@ int rte_vhost_vring_call(int vid, uint16_t vring_idx);
  */
 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
 
+/**
+ * Get vdpa engine id for vhost device.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param eid
+ *  engine id
+ * @return
+ *  engine id
+ */
+int rte_vhost_get_vdpa_eid(int vid);
+
+/**
+ * Get vdpa device id for vhost device.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param did
+ *  device id
+ * @return
+ *  device id
+ */
+int rte_vhost_get_vdpa_did(int vid);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index e1d0036..c4f90af 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -54,6 +54,13 @@ struct vhost_user_socket {
uint64_t protocol_features;
uint16_t queue_num;
 
+   /* engine and device id to identify a certain port on a specific
+* backend, both are set to -1 for sw. when used, one socket can
+* have 1 connection only.
+*/
+   int eid;
+   int did;
+
struct vhost_device_ops const *notify_ops;
 };
 
@@ -524,6 +531,64 @@ find_vhost_user_socket(const char *path)
 }
 
 int
+rte_vhost_driver_set_vdpa_eid(const char *path, int eid)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   vsocket->eid = eid;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_set_vdpa_did(const char *path, int did)
+{
+   struct vhost_user_socket *vsocket;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   vsocket->did = did;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_get_vdpa_eid(const char *path)
+{
+   struct vhost_user_socket *vsocket;
+   int eid = -1;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   eid = vsocket->eid;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return eid;
+}
+
+int
+rte_vhost_driver_get_vdpa_did(const char *path)
+{
+   struct vhost_user_socket *vsocket;
+   int did = -1;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (vsocket)
+   did = vsocket->did;
+   pthread_mutex_unlock(&vhost_user.mutex);
+
+   return did;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
struct vhost_user_socket *vsocket;
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 1dd9adb..2dff199 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -288,6 +288,8 @@ vhost_new_device(void)
vhost_devices[i] = dev;
dev->vid = i;
dev->slave_req_fd = -1

[dpdk-dev] [PATCH 6/7] vhost: get callfd before device setup

2018-02-02 Thread Zhihong Wang
From: Xiao Wang 

This patch is to make sure device is configured with all needed guest
info. According to QEMU vhost message sequence, the real callfd comes
just before SET_VRING_ENABLE.

Signed-off-by: Xiao Wang 
---
 lib/librte_vhost/vhost_user.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 05b53fa..3fe1b3d 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1547,7 +1547,8 @@ vhost_user_msg_handler(int vid, int fd)
send_vhost_reply(fd, &msg);
}
 
-   if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
+   if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)
+   && msg.request.master == VHOST_USER_SET_VRING_ENABLE) {
dev->flags |= VIRTIO_DEV_READY;
 
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
-- 
2.7.5



[dpdk-dev] [PATCH 7/7] vhost: export new apis

2018-02-02 Thread Zhihong Wang
This patch exports new APIs as experimental.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/rte_vdpa.h| 16 +++-
 lib/librte_vhost/rte_vhost.h   | 30 --
 lib/librte_vhost/rte_vhost_version.map | 18 ++
 3 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
index 729849b..f6f6d0a 100644
--- a/lib/librte_vhost/rte_vdpa.h
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -93,15 +93,21 @@ extern struct rte_vdpa_engine *vdpa_engines[];
 extern uint32_t vdpa_engine_num;
 
 /* engine management */
-int rte_vdpa_register_engine(const char *name, struct rte_vdpa_eng_addr *addr);
-int rte_vdpa_unregister_engine(int eid);
+int __rte_experimental
+rte_vdpa_register_engine(const char *name, struct rte_vdpa_eng_addr *addr);
 
-int rte_vdpa_find_engine_id(struct rte_vdpa_eng_addr *addr);
+int __rte_experimental
+rte_vdpa_unregister_engine(int eid);
 
-int rte_vdpa_info_query(int eid, struct rte_vdpa_eng_attr *attr);
+int __rte_experimental
+rte_vdpa_find_engine_id(struct rte_vdpa_eng_addr *addr);
+
+int __rte_experimental
+rte_vdpa_info_query(int eid, struct rte_vdpa_eng_attr *attr);
 
 /* driver register api */
-void rte_vdpa_register_driver(struct rte_vdpa_eng_driver *drv);
+void __rte_experimental
+rte_vdpa_register_driver(struct rte_vdpa_eng_driver *drv);
 
 #define RTE_VDPA_REGISTER_DRIVER(nm, drv) \
 RTE_INIT(vdpainitfn_ ##nm); \
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 03f4ed1..dc38566 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -187,7 +187,8 @@ int rte_vhost_driver_unregister(const char *path);
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_set_vdpa_eid(const char *path, int eid);
+int __rte_experimental
+rte_vhost_driver_set_vdpa_eid(const char *path, int eid);
 
 /**
  * Set the device id, enforce single connection per socket
@@ -199,7 +200,8 @@ int rte_vhost_driver_set_vdpa_eid(const char *path, int 
eid);
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_set_vdpa_did(const char *path, int did);
+int __rte_experimental
+rte_vhost_driver_set_vdpa_did(const char *path, int did);
 
 /**
  * Get the engine id
@@ -209,7 +211,8 @@ int rte_vhost_driver_set_vdpa_did(const char *path, int 
did);
  * @return
  *  Engine id, -1 on failure
  */
-int rte_vhost_driver_get_vdpa_eid(const char *path);
+int __rte_experimental
+rte_vhost_driver_get_vdpa_eid(const char *path);
 
 /**
  * Get the device id
@@ -219,7 +222,8 @@ int rte_vhost_driver_get_vdpa_eid(const char *path);
  * @return
  *  Device id, -1 on failure
  */
-int rte_vhost_driver_get_vdpa_did(const char *path);
+int __rte_experimental
+rte_vhost_driver_get_vdpa_did(const char *path);
 
 /**
  * Set the feature bits the vhost-user driver supports.
@@ -243,7 +247,8 @@ int rte_vhost_driver_set_features(const char *path, 
uint64_t features);
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_get_protocol_features(const char *path,
+int __rte_experimental
+rte_vhost_driver_get_protocol_features(const char *path,
uint64_t *protocol_features);
 
 /**
@@ -256,7 +261,8 @@ int rte_vhost_driver_get_protocol_features(const char *path,
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_set_protocol_features(const char *path,
+int __rte_experimental
+rte_vhost_driver_set_protocol_features(const char *path,
uint64_t protocol_features);
 
 /**
@@ -269,7 +275,8 @@ int rte_vhost_driver_set_protocol_features(const char *path,
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_get_queue_num(const char *path, uint16_t *queue_num);
+int __rte_experimental
+rte_vhost_driver_get_queue_num(const char *path, uint16_t *queue_num);
 
 /**
  * Set the queue number the vhost-user driver supports.
@@ -281,7 +288,8 @@ int rte_vhost_driver_get_queue_num(const char *path, 
uint16_t *queue_num);
  * @return
  *  0 on success, -1 on failure
  */
-int rte_vhost_driver_set_queue_num(const char *path, uint16_t queue_num);
+int __rte_experimental
+rte_vhost_driver_set_queue_num(const char *path, uint16_t queue_num);
 
 /**
  * Enable vhost-user driver features.
@@ -546,7 +554,8 @@ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
  * @return
  *  engine id
  */
-int rte_vhost_get_vdpa_eid(int vid);
+int __rte_experimental
+rte_vhost_get_vdpa_eid(int vid);
 
 /**
  * Get vdpa device id for vhost device.
@@ -558,7 +567,8 @@ int rte_vhost_get_vdpa_eid(int vid);
  * @return
  *  device id
  */
-int rte_vhost_get_vdpa_did(int vid);
+int __rte_experimental
+rte_vhost_get_vdpa_did(int vid);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_vhost/rte_vhost_version.map 
b/lib/librte_vhost/rte_vhost_version.map
index df01031..de585df 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -59,3 +59,21 @@ DPDK_18.02 {
 

[dpdk-dev] [PATCH 5/7] vhost: adapt vhost lib for selective datapath

2018-02-02 Thread Zhihong Wang
This patch adapts vhost lib for selective datapath by calling device ops
at the corresponding stage.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/socket.c |  3 +++
 lib/librte_vhost/vhost.c  |  5 +
 lib/librte_vhost/vhost_user.c | 48 +++
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index c4f90af..8296e4b 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -205,6 +205,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket 
*vsocket)
size = strnlen(vsocket->path, PATH_MAX);
vhost_set_ifname(vid, vsocket->path, size);
 
+   vhost_set_vdpa_eid(vid, vsocket->eid);
+   vhost_set_vdpa_did(vid, vsocket->did);
+
if (vsocket->dequeue_zero_copy)
vhost_enable_dequeue_zero_copy(vid);
 
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 2dff199..1a3ddd5 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -302,11 +302,16 @@ void
 vhost_destroy_device(int vid)
 {
struct virtio_net *dev = get_device(vid);
+   int eid = dev->eid;
 
if (dev == NULL)
return;
 
if (dev->flags & VIRTIO_DEV_RUNNING) {
+   if (eid >= 0 && vdpa_engines[eid] &&
+   vdpa_engines[eid]->eng_drv &&
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close)
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_RUNNING;
dev->notify_ops->destroy_device(vid);
}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index b1762e6..05b53fa 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -116,7 +116,13 @@ vhost_user_set_owner(void)
 static int
 vhost_user_reset_owner(struct virtio_net *dev)
 {
+   int eid = dev->eid;
+
if (dev->flags & VIRTIO_DEV_RUNNING) {
+   if (eid >= 0 && vdpa_engines[eid] &&
+   vdpa_engines[eid]->eng_drv &&
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close)
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_RUNNING;
dev->notify_ops->destroy_device(dev->vid);
}
@@ -157,6 +163,7 @@ static int
 vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 {
uint64_t vhost_features = 0;
+   int eid = dev->eid;
 
rte_vhost_driver_get_features(dev->ifname, &vhost_features);
if (features & ~vhost_features) {
@@ -186,6 +193,11 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t 
features)
dev->notify_ops->features_changed(dev->vid, features);
}
 
+   if (eid >= 0 && vdpa_engines[eid] &&
+   vdpa_engines[eid]->eng_drv &&
+   vdpa_engines[eid]->eng_drv->dev_ops.feature_set)
+   vdpa_engines[eid]->eng_drv->dev_ops.feature_set(dev->vid);
+
dev->features = features;
if (dev->features &
((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) 
{
@@ -883,9 +895,14 @@ vhost_user_get_vring_base(struct virtio_net *dev,
  VhostUserMsg *msg)
 {
struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
+   int eid = dev->eid;
 
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
+   if (eid >= 0 && vdpa_engines[eid] &&
+   vdpa_engines[eid]->eng_drv &&
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close)
+   vdpa_engines[eid]->eng_drv->dev_ops.dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_RUNNING;
dev->notify_ops->destroy_device(dev->vid);
}
@@ -928,16 +945,24 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
VhostUserMsg *msg)
 {
int enable = (int)msg->payload.state.num;
+   int index = (int)msg->payload.state.index;
+   int eid = dev->eid;
 
RTE_LOG(INFO, VHOST_CONFIG,
"set queue enable: %d to qp idx: %d\n",
-   enable, msg->payload.state.index);
+   enable, index);
+
+   if (eid >= 0 && vdpa_engines[eid] &&
+   vdpa_engines[eid]->eng_drv &&
+   vdpa_engines[eid]->eng_drv->dev_ops.vring_state_set)
+   vdpa_engines[eid]->eng_drv->dev_ops.vring_state_set(dev->vid,
+   index, enable);
 
if (dev->notify_ops->vring_state_changed)
dev->notify_ops->vring_state_changed(dev->vid,
-   msg->payload.state.index, enable);
+   index, enable);
 
- 

Re: [dpdk-dev] [RFC v3] Compression API in DPDK :SW ZLIB PMD

2018-02-02 Thread Jain, Deepak K
Hi Shally,

> -Original Message-
> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> Sent: Friday, February 2, 2018 11:27 AM
> To: Trahe, Fiona ; dev@dpdk.org
> Cc: Gupta, Ashish ; Sahu, Sunila
> ; Challa, Mahipal ;
> Athreya, Narayana Prasad ; De Lara
> Guarch, Pablo ; Jain, Deepak K
> ; Roy Pledge ; Youri
> Querry ; Hemant Agrawal
> ; Ahmed Mansour
> ; De Lara Guarch, Pablo
> 
> Subject: RE: [RFC v3] Compression API in DPDK :SW ZLIB PMD
 >; Roy Pledge ; Youri
> >Querry ; Hemant Agrawal
> >; Ahmed Mansour
> ; De
> >Lara Guarch, Pablo 
> >Subject: RE: [RFC v3] Compression API in DPDK :SW ZLIB PMD
> >
> >Hi Fiona
> >
> >> I think a common draft repo would be the best way forward. Let's talk to
> the maintainers about getting one set up.
> >Sure. Please share us details when it is available. I assume it will be 
> >created
> once API spec move from RFC to 1st version?
> >
> [Shally] Any update on this? We're available with sample SW ZLIB based
> implementation (currently proof-concepted for stateless only) . So, if any
> repo is available, we can upload it there.

We are working towards getting final internal approvals and will have more 
info. on this early next week.

> 
> >Thanks
> >Shally
> >
> >From: Trahe, Fiona [mailto:fiona.tr...@intel.com]
> >Sent: 12 January 2018 00:31
> >To: Verma, Shally ; dev@dpdk.org
> >Cc: Gupta, Ashish ; Sahu, Sunila
> >; Challa, Mahipal
> ;
> >Athreya, Narayana Prasad ; De
> Lara
> >Guarch, Pablo ; Jain, Deepak K
> >; Roy Pledge ; Youri
> >Querry ; Hemant Agrawal
> >; Ahmed Mansour
> ; Trahe,
> >Fiona ; De Lara Guarch, Pablo
> >
> >Subject: RE: [RFC v3] Compression API in DPDK :SW ZLIB PMD
> >
> >Hi Shally,
> >
> >
> >From: Verma, Shally [mailto:shally.ve...@cavium.com]
> >Sent: Wednesday, January 10, 2018 8:33 AM
> >To: Trahe, Fiona ; mailto:dev@dpdk.org
> >Cc: Gupta, Ashish ; Sahu, Sunila
> >; Challa, Mahipal
> >; Athreya, Narayana Prasad
> >; De Lara Guarch, Pablo
> >; Jain, Deepak K
> >; Roy Pledge
> >; Youri Querry
> >; Hemant Agrawal
> >; Ahmed Mansour
> >
> >Subject: [RFC v3] Compression API in DPDK :SW ZLIB PMD
> >
> >Hi Fiona
> >
> >We are planning to implement ZLIB based SW PMD to proof-concept DPDK
> >compression RFC v3 API spec internally. However, would like to check If
> >you're working upon similar in parallel and if yes, then what's your
> >development roadmap / strategy so that we could see if we could leverage
> joint effort.
> >Depending upon your feedback, we can see if we can have some common
> repo for joint development or send it as RFC patch.
> >
> >Let me know your opinion on same.
> >[Fiona] We have not started a zlib based SW PMD, and would be delighted
> >if you would do this, I agree it's a good way to prove out the API.
> >We are writing some unit tests against the API, we would be happy to
> >share these with you, and continue to develop these jointly so the same
> test sets can be targeted against all PMDs.
> >I think a common draft repo would be the best way forward. Let's talk to
> the maintainers about getting one set up.
> >
> >Thanks
> >Shally



[dpdk-dev] [PATCH] pmdinfogen: fix resource leak of FILE object

2018-02-02 Thread Bruce Richardson
Coverity flags an issue where the resources used by the FILE object for
the temporary input file are leaked. This is a very minor issue, but is
easily fixed, while also avoiding later problems where we try to close
an invalid file descriptor in the failure case.

The fix is to use "dup()" to get a new file descriptor number rather than
using the value directly from fileno. This allows us to close the file
opened with tmpfile() within in scope block, while allowing the duplicate
to pass to the outer block and be closed when the function terminates.

As a side-effect I/O in the function is therefore changed from using stdio
fread/fwrite to read/write system calls.

Coverity issue: 260399
Fixes: 0d68533617e3 ("pmdinfogen: allow using stdin and stdout")

Signed-off-by: Bruce Richardson 
---
 buildtools/pmdinfogen/pmdinfogen.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/buildtools/pmdinfogen/pmdinfogen.c 
b/buildtools/pmdinfogen/pmdinfogen.c
index 45b267346..0f35ca46b 100644
--- a/buildtools/pmdinfogen/pmdinfogen.c
+++ b/buildtools/pmdinfogen/pmdinfogen.c
@@ -50,20 +50,24 @@ static void *grab_file(const char *filename, unsigned long 
*size)
/* from stdin, use a temporary file to mmap */
FILE *infile;
char buffer[1024];
-   size_t n;
+   int n;
 
infile = tmpfile();
if (infile == NULL) {
perror("tmpfile");
return NULL;
}
-   while (!feof(stdin)) {
-   n = fread(buffer, 1, sizeof(buffer), stdin);
-   if (fwrite(buffer, 1, n, infile) != n)
+   fd = dup(fileno(infile));
+   fclose(infile);
+   if (fd < 0)
+   return NULL;
+
+   n = read(STDIN_FILENO, buffer, sizeof(buffer));
+   while (n > 0) {
+   if (write(fd, buffer, n) != n)
goto failed;
+   n = read(STDIN_FILENO, buffer, sizeof(buffer));
}
-   fflush(infile);
-   fd = fileno(infile);
}
 
if (fstat(fd, &st))
-- 
2.14.3



[dpdk-dev] [PATCH v4 1/4] net/i40e: add warnings when writing global registers

2018-02-02 Thread Beilei Xing
Add warnings when writing global registers.

Signed-off-by: Beilei Xing 
---
 doc/guides/nics/i40e.rst   | 12 
 drivers/net/i40e/i40e_ethdev.c | 25 
 drivers/net/i40e/i40e_ethdev.h | 43 ++
 drivers/net/i40e/i40e_fdir.c   |  1 +
 drivers/net/i40e/i40e_flow.c   |  1 +
 5 files changed, 82 insertions(+)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 29601f1..166f447 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -566,6 +566,18 @@ DCB function
 
 DCB works only when RSS is enabled.
 
+Global configuration warning
+
+
+I40E PMD will set some global registers to enable some function or set some
+configure. Then when using different ports of the same NIC with Linux kernel
+and DPDK, the port with Linux kernel will be impacted by the port with DPDK.
+For example, register I40E_GL_SWT_L2TAGCTRL is used to control L2 tag, i40e
+PMD uses I40E_GL_SWT_L2TAGCTRL to set vlan TPID. If setting TPID in port A
+with DPDK, then the configuration will also impact port B in the NIC with
+kernel driver, which don't want to use the TPID.
+So PMD reports warning to clarify what is changed by writing global register.
+
 High Performance of Small Packets on 40G NIC
 
 
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 277c1a8..b4a2857 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -680,6 +680,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 */
I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x0029);
I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x9420);
+   i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
 #define I40E_FLOW_CONTROL_ETHERTYPE  0x8808
@@ -1133,6 +1134,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
   0x0028,  NULL);
if (ret)
PMD_INIT_LOG(ERR, "Failed to write L3 MAP register %d", ret);
+   i40e_global_cfg_warning(I40E_WARNING_QINQ_CLOUD_FILTER);
 
/* Need the special FW version to support floating VEB */
config_floating_veb(dev);
@@ -1413,6 +1415,7 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
I40E_WRITE_REG(hw, I40E_GLQF_ORT(33), 0x);
I40E_WRITE_REG(hw, I40E_GLQF_ORT(34), 0x);
I40E_WRITE_REG(hw, I40E_GLQF_ORT(35), 0x);
+   i40e_global_cfg_warning(I40E_WARNING_DIS_FLX_PLD);
 }
 
 static int
@@ -3260,6 +3263,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
/* If NVM API < 1.7, keep the register setting */
ret = i40e_vlan_tpid_set_by_registers(dev, vlan_type,
  tpid, qinq);
+   i40e_global_cfg_warning(I40E_WARNING_TPID);
 
return ret;
 }
@@ -3502,6 +3506,7 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
I40E_WRITE_REG(hw, I40E_GLRPB_GLW,
   pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
+   i40e_global_cfg_warning(I40E_WARNING_FLOW_CTL);
 
I40E_WRITE_FLUSH(hw);
 
@@ -7284,6 +7289,8 @@ i40e_status_code i40e_replace_mpls_l1_filter(struct 
i40e_pf *pf)
 
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
   &filter_replace_buf);
+   if (!status)
+   i40e_global_cfg_warning(I40E_WARNING_RPL_CLD_FILTER);
return status;
 }
 
@@ -7338,6 +7345,8 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct 
i40e_pf *pf)
 
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
   &filter_replace_buf);
+   if (!status)
+   i40e_global_cfg_warning(I40E_WARNING_RPL_CLD_FILTER);
return status;
 }
 
@@ -7405,6 +7414,8 @@ i40e_replace_gtp_l1_filter(struct i40e_pf *pf)
 
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
   &filter_replace_buf);
+   if (!status)
+   i40e_global_cfg_warning(I40E_WARNING_RPL_CLD_FILTER);
return status;
 }
 
@@ -7457,6 +7468,8 @@ i40e_status_code i40e_replace_gtp_cloud_filter(struct 
i40e_pf *pf)
 
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
   &filter_replace_buf);
+   if (!status)
+   i40e_global_cfg_warning(I40E_WARNING_RPL_CLD_FILTER);
return status;
 }
 
@@ -8006,6 +8019,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len)
   reg, NULL);
if (ret != 0)
return ret;
+   i40e_global_cfg_warning(I40E_WARNING_GRE_KEY_LEN);
} else {
ret = 0;
}
@@ -8265,6 +8279,7 @@ i40e_set_has

[dpdk-dev] [PATCH v4 0/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Beilei Xing
DPDK i40e PMD will modify some global registers during initialization
and post initialization, there'll be impact during use of 700 series
Ethernet Adapter with both Linux kernel and DPDK PMD.
This patchset adds log for global configuration and adds device args
to disable global configuration and change interrupt for PF.

v4 changes:
 - Fix interrupt conflict when using multiple driver.

v3 changes:
 - Reword commit log.

v2 changes:
 - Add debug log when writing global registers
  - Add option to disable writing global registers
  

Beilei Xing (4):
  net/i40e: add warnings when writing global registers
  net/i40e: add debug logs when writing global registers
  net/i40e: fix multiple driver support issue
  net/i40e: fix interrupt conflict when using multi-driver

 doc/guides/nics/i40e.rst  |  12 +
 drivers/net/i40e/i40e_ethdev.c| 477 ++
 drivers/net/i40e/i40e_ethdev.h|  69 +-
 drivers/net/i40e/i40e_ethdev_vf.c |   4 +-
 drivers/net/i40e/i40e_fdir.c  |  40 ++--
 drivers/net/i40e/i40e_flow.c  |   9 +
 6 files changed, 487 insertions(+), 124 deletions(-)

-- 
2.5.5



[dpdk-dev] [PATCH v4 2/4] net/i40e: add debug logs when writing global registers

2018-02-02 Thread Beilei Xing
Add debug logs when writing global registers.

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.c | 153 ++---
 drivers/net/i40e/i40e_ethdev.h |  11 +++
 2 files changed, 123 insertions(+), 41 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index b4a2857..aad00aa 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -656,6 +656,15 @@ rte_i40e_dev_atomic_write_link_status(struct rte_eth_dev 
*dev,
return 0;
 }
 
+static inline void
+i40e_write_global_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
+{
+   i40e_write_rx_ctl(hw, reg_addr, reg_val);
+   PMD_DRV_LOG(DEBUG, "Global register 0x%08x is modified "
+   "with value 0x%08x",
+   reg_addr, reg_val);
+}
+
 RTE_PMD_REGISTER_PCI(net_i40e, rte_i40e_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_i40e, pci_id_i40e_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_i40e, "* igb_uio | uio_pci_generic | vfio-pci");
@@ -678,8 +687,8 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 * configuration API is added to avoid configuration conflicts
 * between ports of the same device.
 */
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x0029);
-   I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x9420);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(40), 0x0029);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_PIT(9), 0x9420);
i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
@@ -1134,6 +1143,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
   0x0028,  NULL);
if (ret)
PMD_INIT_LOG(ERR, "Failed to write L3 MAP register %d", ret);
+   PMD_INIT_LOG(DEBUG, "Global register 0x%08x is changed with value 0x28",
+I40E_GLQF_L3_MAP(40));
i40e_global_cfg_warning(I40E_WARNING_QINQ_CLOUD_FILTER);
 
/* Need the special FW version to support floating VEB */
@@ -1412,9 +1423,9 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
 * Disable by default flexible payload
 * for corresponding L2/L3/L4 layers.
 */
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(33), 0x);
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(34), 0x);
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(35), 0x);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(33), 0x);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(34), 0x);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(35), 0x);
i40e_global_cfg_warning(I40E_WARNING_DIS_FLX_PLD);
 }
 
@@ -3219,8 +3230,8 @@ i40e_vlan_tpid_set_by_registers(struct rte_eth_dev *dev,
return -EIO;
}
PMD_DRV_LOG(DEBUG,
-   "Debug write 0x%08"PRIx64" to I40E_GL_SWT_L2TAGCTRL[%d]",
-   reg_w, reg_id);
+   "Global register 0x%08x is changed with value 0x%08x",
+   I40E_GL_SWT_L2TAGCTRL(reg_id), (uint32_t)reg_w);
 
return 0;
 }
@@ -3494,16 +3505,16 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
}
 
/* config the water marker both based on the packets and bytes */
-   I40E_WRITE_REG(hw, I40E_GLRPB_PHW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_PHW,
   (pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT) / I40E_PACKET_AVERAGE_SIZE);
-   I40E_WRITE_REG(hw, I40E_GLRPB_PLW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_PLW,
   (pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT) / I40E_PACKET_AVERAGE_SIZE);
-   I40E_WRITE_REG(hw, I40E_GLRPB_GHW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_GHW,
   pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
-   I40E_WRITE_REG(hw, I40E_GLRPB_GLW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_GLW,
   pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
i40e_global_cfg_warning(I40E_WARNING_FLOW_CTL);
@@ -7289,8 +7300,13 @@ i40e_status_code i40e_replace_mpls_l1_filter(struct 
i40e_pf *pf)
 
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
   &filter_replace_buf);
-   if (!status)
+   if (!status) {
i40e_global_cfg_warning(I40E_WARNING_RPL_CLD_FILTER);
+   PMD_DRV_LOG(DEBUG, "Global configuration modification: "
+   "cloud l1 type is changed from 0x%x to 0x%x",
+   filter_replace.old_filter_type,
+   filter_replace.new_filter_type);
+   }
return status;
 }
 
@@ -7323,6 +7339,10 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct 
i40e_pf *pf)
   &filter_replace_buf);
if (status < 0)
   

[dpdk-dev] [PATCH v4 3/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Beilei Xing
This patch provides the option to disable writing some global registers
in PMD, in order to avoid affecting other drivers, when multiple drivers
run on the same NIC and control different physical ports. Because there
are few global resources shared among different physical ports.

Fixes: ec246eeb5da1 ("i40e: use default filter input set on init")
Fixes: 98f055707685 ("i40e: configure input fields for RSS or flow director")
Fixes: f05ec7d77e41 ("i40e: initialize flow director flexible payload setting")
Fixes: e536c2e32883 ("net/i40e: fix parsing QinQ packets type")
Fixes: 19b16e2f6442 ("ethdev: add vlan type when setting ether type")

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.c | 262 -
 drivers/net/i40e/i40e_ethdev.h |   1 +
 drivers/net/i40e/i40e_fdir.c   |  39 +++---
 drivers/net/i40e/i40e_flow.c   |   8 ++
 4 files changed, 240 insertions(+), 70 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index aad00aa..bede5c5 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1039,6 +1039,64 @@ i40e_init_queue_region_conf(struct rte_eth_dev *dev)
memset(info, 0, sizeof(struct i40e_queue_regions));
 }
 
+#define ETH_I40E_SUPPORT_MULTI_DRIVER  "support-multi-driver"
+
+static int
+i40e_parse_multi_drv_handler(__rte_unused const char *key,
+  const char *value,
+  void *opaque)
+{
+   struct i40e_pf *pf;
+   unsigned long support_multi_driver;
+   char *end;
+
+   pf = (struct i40e_pf *)opaque;
+
+   errno = 0;
+   support_multi_driver = strtoul(value, &end, 10);
+   if (errno != 0 || end == value || *end != 0) {
+   PMD_DRV_LOG(WARNING, "Wrong global configuration");
+   return -(EINVAL);
+   }
+
+   if (support_multi_driver == 1 || support_multi_driver == 0)
+   pf->support_multi_driver = (bool)support_multi_driver;
+   else
+   PMD_DRV_LOG(WARNING, "%s must be 1 or 0,",
+   "enable global configuration by default."
+   ETH_I40E_SUPPORT_MULTI_DRIVER);
+   return 0;
+}
+
+static int
+i40e_support_multi_driver(struct rte_eth_dev *dev)
+{
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   static const char *const valid_keys[] = {
+   ETH_I40E_SUPPORT_MULTI_DRIVER, NULL};
+   struct rte_kvargs *kvlist;
+
+   /* Enable global configuration by default */
+   pf->support_multi_driver = false;
+
+   if (!dev->device->devargs)
+   return 0;
+
+   kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+   if (!kvlist)
+   return -EINVAL;
+
+   if (rte_kvargs_count(kvlist, ETH_I40E_SUPPORT_MULTI_DRIVER) > 1)
+   PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+   "the first invalid or last valid one is used !",
+   ETH_I40E_SUPPORT_MULTI_DRIVER);
+
+   rte_kvargs_process(kvlist, ETH_I40E_SUPPORT_MULTI_DRIVER,
+  i40e_parse_multi_drv_handler, pf);
+   rte_kvargs_free(kvlist);
+   return 0;
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
@@ -1092,6 +1150,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
hw->bus.func = pci_dev->addr.function;
hw->adapter_stopped = 0;
 
+   /* Check if need to support multi-driver */
+   i40e_support_multi_driver(dev);
+
/* Make sure all is clean before doing PF reset */
i40e_clear_hw(hw);
 
@@ -1119,7 +1180,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 * for packet type of QinQ by software.
 * It should be removed once issues are fixed in NVM.
 */
-   i40e_GLQF_reg_init(hw);
+   if (!pf->support_multi_driver)
+   i40e_GLQF_reg_init(hw);
 
/* Initialize the input set for filters (hash and fd) to default value 
*/
i40e_filter_input_set_init(pf);
@@ -1139,13 +1201,17 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 (hw->nvm.version & 0xf), hw->nvm.eetrack);
 
/* initialise the L3_MAP register */
-   ret = i40e_aq_debug_write_register(hw, I40E_GLQF_L3_MAP(40),
-  0x0028,  NULL);
-   if (ret)
-   PMD_INIT_LOG(ERR, "Failed to write L3 MAP register %d", ret);
-   PMD_INIT_LOG(DEBUG, "Global register 0x%08x is changed with value 0x28",
-I40E_GLQF_L3_MAP(40));
-   i40e_global_cfg_warning(I40E_WARNING_QINQ_CLOUD_FILTER);
+   if (!pf->support_multi_driver) {
+   ret = i40e_aq_debug_write_register(hw, I40E_GLQF_L3_MAP(40),
+  0x0028,  NULL);
+   if (ret)
+   PMD_INIT_LOG(ERR, "Failed to write L3 MAP register %d",
+  

[dpdk-dev] [PATCH v4 4/4] net/i40e: fix interrupt conflict when using multi-driver

2018-02-02 Thread Beilei Xing
There's interrupt conflict when using DPDK and Linux i40e
on different ports of the same Ethernet controller, this
patch fixes it by switching from IntN to Int0 if multiple
drivers are used.

Fixes: be6c228d4da3 ("i40e: support Rx interrupt")

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.c| 89 +--
 drivers/net/i40e/i40e_ethdev.h| 14 +++---
 drivers/net/i40e/i40e_ethdev_vf.c |  4 +-
 3 files changed, 68 insertions(+), 39 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index bede5c5..149b98a 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -692,6 +692,23 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
+static inline void i40e_config_automask(struct i40e_pf *pf)
+{
+   struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+   uint32_t val;
+
+   /* INTENA flag is not auto-cleared for interrupt */
+   val = I40E_READ_REG(hw, I40E_GLINT_CTL);
+   val |= I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK |
+   I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK;
+
+   /* If support multi-driver, PF will use INT0. */
+   if (!pf->support_multi_driver)
+   val |= I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK;
+
+   I40E_WRITE_REG(hw, I40E_GLINT_CTL, val);
+}
+
 #define I40E_FLOW_CONTROL_ETHERTYPE  0x8808
 
 /*
@@ -1173,6 +1190,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
return ret;
}
 
+   i40e_config_automask(pf);
+
i40e_set_default_pctype_table(dev);
 
/*
@@ -1705,6 +1724,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
msix_vect,
int i;
uint32_t val;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
+   struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
 
/* Bind all RX queues to allocated MSIX interrupt */
for (i = 0; i < nb_queue; i++) {
@@ -1723,7 +1743,8 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
msix_vect,
/* Write first RX queue to Link list register as the head element */
if (vsi->type != I40E_VSI_SRIOV) {
uint16_t interval =
-   i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1);
+   i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
+  pf->support_multi_driver);
 
if (msix_vect == I40E_MISC_VEC_ID) {
I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0,
@@ -1782,7 +1803,6 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
itr_idx)
uint16_t nb_msix = RTE_MIN(vsi->nb_msix, intr_handle->nb_efd);
uint16_t queue_idx = 0;
int record = 0;
-   uint32_t val;
int i;
 
for (i = 0; i < vsi->nb_qps; i++) {
@@ -1790,13 +1810,6 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
itr_idx)
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(vsi->base_queue + i), 0);
}
 
-   /* INTENA flag is not auto-cleared for interrupt */
-   val = I40E_READ_REG(hw, I40E_GLINT_CTL);
-   val |= I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK |
-   I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK |
-   I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK;
-   I40E_WRITE_REG(hw, I40E_GLINT_CTL, val);
-
/* VF bind interrupt */
if (vsi->type == I40E_VSI_SRIOV) {
__vsi_queues_bind_intr(vsi, msix_vect,
@@ -1853,27 +1866,22 @@ i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi)
struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-   uint16_t interval = i40e_calc_itr_interval(\
-   RTE_LIBRTE_I40E_ITR_INTERVAL, 1);
+   struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
uint16_t msix_intr, i;
 
-   if (rte_intr_allow_others(intr_handle))
+   if (rte_intr_allow_others(intr_handle) || !pf->support_multi_driver)
for (i = 0; i < vsi->nb_msix; i++) {
msix_intr = vsi->msix_intr + i;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(msix_intr - 1),
I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
-   (0 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-   (interval <<
-I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT));
+   I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
}
else
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0,
   I40E_PFINT_DYN_CTL0_INTENA_MASK |
   I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
-  (0 << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT) |
-  (interval <<
-   

Re: [dpdk-dev] [PATCH] net/i40e: fix VF testpmd startup failure issue

2018-02-02 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Xing, Beilei
> Sent: Friday, February 2, 2018 6:44 PM
> To: Li, Xiaoyun
> Cc: dev@dpdk.org; sta...@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: fix VF testpmd startup failure issue
> 
> 
> 
> > -Original Message-
> > From: Li, Xiaoyun
> > Sent: Friday, February 2, 2018 1:45 PM
> > To: Xing, Beilei 
> > Cc: dev@dpdk.org; Li, Xiaoyun ; sta...@dpdk.org
> > Subject: [PATCH] net/i40e: fix VF testpmd startup failure issue
> >
> > New testpmd will get CRC strip offload from rx_offload_capa. I40evf
> > cannot disable CRC strip. And in fact, it is enabled by PF. This patch
> > solves the issue by adding CRC strip flag into rx_offload_capa in i40e and
> i40evf.
> >
> > Fixes: 8b9bd0efe0b6 ("app/testpmd: disable Rx VLAN offloads by
> > default")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Xiaoyun Li 
> > ---
> >  drivers/net/i40e/i40e_ethdev.c| 3 ++-
> >  drivers/net/i40e/i40e_ethdev_vf.c | 3 ++-
> >  2 files changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 7e3d1a8..403831d 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -3083,7 +3083,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev,
> > struct rte_eth_dev_info *dev_info)
> > DEV_RX_OFFLOAD_QINQ_STRIP |
> > DEV_RX_OFFLOAD_IPV4_CKSUM |
> > DEV_RX_OFFLOAD_UDP_CKSUM |
> > -   DEV_RX_OFFLOAD_TCP_CKSUM;
> > +   DEV_RX_OFFLOAD_TCP_CKSUM |
> > +   DEV_RX_OFFLOAD_CRC_STRIP;
> > dev_info->tx_offload_capa =
> > DEV_TX_OFFLOAD_VLAN_INSERT |
> > DEV_TX_OFFLOAD_QINQ_INSERT |
> > diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> > b/drivers/net/i40e/i40e_ethdev_vf.c
> > index 57f7613..169e1b1 100644
> > --- a/drivers/net/i40e/i40e_ethdev_vf.c
> > +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> > @@ -2194,7 +2194,8 @@ i40evf_dev_info_get(struct rte_eth_dev *dev,
> > struct rte_eth_dev_info *dev_info)
> > DEV_RX_OFFLOAD_QINQ_STRIP |
> > DEV_RX_OFFLOAD_IPV4_CKSUM |
> > DEV_RX_OFFLOAD_UDP_CKSUM |
> > -   DEV_RX_OFFLOAD_TCP_CKSUM;
> > +   DEV_RX_OFFLOAD_TCP_CKSUM |
> > +   DEV_RX_OFFLOAD_CRC_STRIP;
> > dev_info->tx_offload_capa =
> > DEV_TX_OFFLOAD_VLAN_INSERT |
> > DEV_TX_OFFLOAD_QINQ_INSERT |
> > --
> > 2.7.4
> 
> Acked-by: Beilei Xing , thanks.
Applied to dpdk-next-net-intel, with minor commit log changes. Thanks!

/Helin



Re: [dpdk-dev] [PATCH] event/rx_adapter: fix ignore return of event start

2018-02-02 Thread Jerin Jacob
-Original Message-
> Date: Fri, 2 Feb 2018 10:04:20 +
> From: "Rao, Nikhil" 
> To: "Varghese, Vipin" , Jerin Jacob
>  
> CC: "dev@dpdk.org" , "Jacob,  Jerin"
>  , "Van Haaren, Harry"
>  , Hemant Agrawal ,
>  "Jain, Deepak K" 
> Subject: RE: [PATCH] event/rx_adapter: fix ignore return of event start
> 
> 
> > -Original Message-
> > From: Varghese, Vipin
> > Sent: Friday, February 2, 2018 1:39 PM
> > To: Jerin Jacob ; Rao, Nikhil
> > 
> > Cc: dev@dpdk.org; Jacob, Jerin ; Van
> > Haaren, Harry ; Hemant Agrawal
> > ; Jain, Deepak K 
> > Subject: RE: [PATCH] event/rx_adapter: fix ignore return of event start
> > 
> > 
> > 
> > > -Original Message-
> > > From: Jerin Jacob [mailto:jerin.ja...@caviumnetworks.com]
> > > Sent: Wednesday, January 31, 2018 6:54 AM
> > > To: Rao, Nikhil 
> > > Cc: Varghese, Vipin ; dev@dpdk.org; Jacob,
> > > Jerin ; Van Haaren, Harry
> > > ; Hemant Agrawal
> > ;
> > > Jain, Deepak K 
> > > Subject: Re: [PATCH] event/rx_adapter: fix ignore return of event
> > > start
> > >
> > > -Original Message-
> > > >
> > > >
> > > > Adding eventdev PMD folks for their suggestions on how to handle the
> > > > return
> > > value from rte_event_dev_start() below.
> > > >
> > > > > -Original Message-
> > > > > From: Varghese, Vipin
> > > > > Sent: Wednesday, January 31, 2018 4:26 AM
> > > > > To: dev@dpdk.org; Rao, Nikhil 
> > > > > Cc: Jain, Deepak K ; Varghese, Vipin
> > > > > 
> > > > > Subject: [PATCH] event/rx_adapter: fix ignore return of event
> > > > > start
> > > > >
> > > > > Capture the return value for rte_event_dev_start. Return the
> > > > > result back to user.
> > > > >
> > > > > Coverity issue: 257000
> > > > > Fixes: 9c38b704d280 ("eventdev: add eth Rx adapter
> > > > > implementation")
> > > > > Cc: nikhil@intel.com
> > > > >
> > > > > Signed-off-by: Vipin Varghese 
> > > > > ---
> > > > >  lib/librte_eventdev/rte_event_eth_rx_adapter.c | 4 ++--
> > > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > > b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > > index 90106e6..a818bef 100644
> > > > > --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > > +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
> > > > > @@ -603,7 +603,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
> > > > >   RTE_EDEV_LOG_ERR("failed to configure event dev %u\n",
> > > > >   dev_id);
> > > > >   if (started)
> > > > > - rte_event_dev_start(dev_id);
> > > > > + ret = rte_event_dev_start(dev_id);
> > > >
> > > > Currently the a non-zero return value at this point signifies an
> > > > error returned
> > > from rte_event_dev_configure(),  so I suggest that the return value is
> > > typecasted to void.
> > >
> > > If I understand it correctly, Any one of the failure(configure() or
> > > start()) should result in bad state. Right?
> > > i.e If some reason PMD is not able to start() even after failure
> > > configuration() would result in bad state.
> > > If so, one option could be combine the error like ret |= operation or
> > > create a new logical error in Rx adapter which denotes this new error.
> > >
> > 
> > So do we agree to ACK these changes to get the code fix to the mainline? 
> 
> Sorry, if my original email wasn't clear,  if rte_event_dev_configure() 
> returns an error and rte_eventdev_start() returns success that would be a 
> problem, i.e., the fix is incorrect.
> 
> Of the 2 options suggested by Jerin - Since ret is not a bitmask  ret |= 
> wouldn't work, if I understand the option correctly . A new error would work.
> 
> How about EIO ? and we also update the documentation to indicate that the 
> event device would be in a stopped state if the return code is EIO.

+1 for new error. You may consider EBUSY or EINPROGRESS also.No strong opinion 
for the name.



[dpdk-dev] [PATCH v3 2/4] net/i40e: add debug logs when writing global registers

2018-02-02 Thread Beilei Xing
Add debug logs when writing global registers.

Signed-off-by: Beilei Xing 
Cc: sta...@dpdk.org
---
 drivers/net/i40e/i40e_ethdev.c | 127 +
 drivers/net/i40e/i40e_ethdev.h |   8 +++
 2 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 44821f2..ef23241 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -716,6 +716,15 @@ rte_i40e_dev_atomic_write_link_status(struct rte_eth_dev 
*dev,
return 0;
 }
 
+static inline void
+i40e_write_global_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
+{
+   i40e_write_rx_ctl(hw, reg_addr, reg_val);
+   PMD_DRV_LOG(DEBUG, "Global register 0x%08x is modified "
+   "with value 0x%08x",
+   reg_addr, reg_val);
+}
+
 RTE_PMD_REGISTER_PCI(net_i40e, rte_i40e_pmd.pci_drv);
 RTE_PMD_REGISTER_PCI_TABLE(net_i40e, pci_id_i40e_map);
 
@@ -735,9 +744,9 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 * configuration API is added to avoid configuration conflicts
 * between ports of the same device.
 */
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(33), 0x00E0);
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(34), 0x00E3);
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(35), 0x00E6);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(33), 0x00E0);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(34), 0x00E3);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(35), 0x00E6);
i40e_global_cfg_warning(I40E_WARNING_ENA_FLX_PLD);
 
/*
@@ -746,8 +755,8 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 * configuration API is added to avoid configuration conflicts
 * between ports of the same device.
 */
-   I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x0029);
-   I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x9420);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_ORT(40), 0x0029);
+   I40E_WRITE_GLB_REG(hw, I40E_GLQF_PIT(9), 0x9420);
i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
@@ -2799,8 +2808,9 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
"I40E_GL_SWT_L2TAGCTRL[%d]", reg_id);
return ret;
}
-   PMD_DRV_LOG(DEBUG, "Debug write 0x%08"PRIx64" to "
-   "I40E_GL_SWT_L2TAGCTRL[%d]", reg_w, reg_id);
+   PMD_DRV_LOG(DEBUG,
+   "Global register 0x%08x is changed with value 0x%08x",
+   I40E_GL_SWT_L2TAGCTRL(reg_id), (uint32_t)reg_w);
 
i40e_global_cfg_warning(I40E_WARNING_TPID);
 
@@ -3030,16 +3040,16 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
}
 
/* config the water marker both based on the packets and bytes */
-   I40E_WRITE_REG(hw, I40E_GLRPB_PHW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_PHW,
   (pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT) / I40E_PACKET_AVERAGE_SIZE);
-   I40E_WRITE_REG(hw, I40E_GLRPB_PLW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_PLW,
   (pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT) / I40E_PACKET_AVERAGE_SIZE);
-   I40E_WRITE_REG(hw, I40E_GLRPB_GHW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_GHW,
   pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
-   I40E_WRITE_REG(hw, I40E_GLRPB_GLW,
+   I40E_WRITE_GLB_REG(hw, I40E_GLRPB_GLW,
   pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
i40e_global_cfg_warning(I40E_WARNING_FLOW_CTL);
@@ -6880,6 +6890,9 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len)
   reg, NULL);
if (ret != 0)
return ret;
+   PMD_DRV_LOG(DEBUG, "Global register 0x%08x is changed "
+   "with value 0x%08x",
+   I40E_GL_PRS_FVBM(2), reg);
i40e_global_cfg_warning(I40E_WARNING_GRE_KEY_LEN);
} else {
ret = 0;
@@ -7124,41 +7137,43 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw,
I40E_GLQF_HSYM_SYMH_ENA_MASK : 0;
if (hw->mac.type == I40E_MAC_X722) {
if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_UDP) {
-   i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
+   i40e_write_global_rx_ctl(hw, I40E_GLQF_HSYM(
  I40E_FILTER_PCTYPE_NONF_IPV4_UDP), reg);
-   i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
+   i40e_write_global_rx_ctl(hw, I40E_GLQF_HSYM(
  I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP),
 

[dpdk-dev] [PATCH v3 3/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Beilei Xing
This patch provides the option to disable writing some global registers
in PMD, in order to avoid affecting other drivers, when multiple drivers
run on the same NIC and control different physical ports. Because there
are few global resources shared among different physical ports.

Fixes: ec246eeb5da1 ("i40e: use default filter input set on init")
Fixes: 98f055707685 ("i40e: configure input fields for RSS or flow director")
Fixes: f05ec7d77e41 ("i40e: initialize flow director flexible payload setting")
Fixes: e536c2e32883 ("net/i40e: fix parsing QinQ packets type")
Fixes: 19b16e2f6442 ("ethdev: add vlan type when setting ether type")
Cc: sta...@dpdk.org

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.c | 215 -
 drivers/net/i40e/i40e_ethdev.h |   2 +
 2 files changed, 171 insertions(+), 46 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index ef23241..ae0f31a 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -944,6 +944,67 @@ config_floating_veb(struct rte_eth_dev *dev)
 #define I40E_L2_TAGS_S_TAG_SHIFT 1
 #define I40E_L2_TAGS_S_TAG_MASK I40E_MASK(0x1, I40E_L2_TAGS_S_TAG_SHIFT)
 
+#define ETH_I40E_SUPPORT_MULTI_DRIVER  "support-multi-driver"
+RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
+ ETH_I40E_SUPPORT_MULTI_DRIVER "=0|1");
+
+static int
+i40e_parse_multi_drv_handler(__rte_unused const char *key,
+ const char *value,
+ void *opaque)
+{
+   struct i40e_pf *pf;
+   unsigned long support_multi_driver;
+   char *end;
+
+   pf = (struct i40e_pf *)opaque;
+
+   errno = 0;
+   support_multi_driver = strtoul(value, &end, 10);
+   if (errno != 0 || end == value || *end != 0) {
+   PMD_DRV_LOG(WARNING, "Wrong global configuration");
+   return -(EINVAL);
+   }
+
+   if (support_multi_driver == 1 || support_multi_driver == 0)
+   pf->support_multi_driver = (bool)support_multi_driver;
+   else
+   PMD_DRV_LOG(WARNING, "%s must be 1 or 0,",
+   "enable global configuration by default."
+   ETH_I40E_SUPPORT_MULTI_DRIVER);
+   return 0;
+}
+
+static int
+i40e_support_multi_driver(struct rte_eth_dev *dev)
+{
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   struct rte_pci_device *pci_dev = dev->pci_dev;
+   static const char *valid_keys[] = {
+   ETH_I40E_SUPPORT_MULTI_DRIVER, NULL};
+   struct rte_kvargs *kvlist;
+
+   /* Enable global configuration by default */
+   pf->support_multi_driver = false;
+
+   if (!pci_dev->device.devargs)
+   return 0;
+
+   kvlist = rte_kvargs_parse(pci_dev->device.devargs->args, valid_keys);
+   if (!kvlist)
+   return -EINVAL;
+
+   if (rte_kvargs_count(kvlist, ETH_I40E_SUPPORT_MULTI_DRIVER) > 1)
+   PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+   "the first invalid or last valid one is used !",
+   ETH_I40E_SUPPORT_MULTI_DRIVER);
+
+   rte_kvargs_process(kvlist, ETH_I40E_SUPPORT_MULTI_DRIVER,
+  i40e_parse_multi_drv_handler, pf);
+   rte_kvargs_free(kvlist);
+   return 0;
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
@@ -993,6 +1054,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
hw->bus.func = pci_dev->addr.function;
hw->adapter_stopped = 0;
 
+   /* Check if need to support multi-driver */
+   i40e_support_multi_driver(dev);
+
/* Make sure all is clean before doing PF reset */
i40e_clear_hw(hw);
 
@@ -1019,7 +1083,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 * software. It should be removed once issues are fixed
 * in NVM.
 */
-   i40e_GLQF_reg_init(hw);
+   if (!pf->support_multi_driver)
+   i40e_GLQF_reg_init(hw);
 
/* Initialize the input set for filters (hash and fd) to default value 
*/
i40e_filter_input_set_init(pf);
@@ -1115,11 +1180,14 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
i40e_set_fc(hw, &aq_fail, TRUE);
 
/* Set the global registers with default ether type value */
-   ret = i40e_vlan_tpid_set(dev, ETH_VLAN_TYPE_OUTER, ETHER_TYPE_VLAN);
-   if (ret != I40E_SUCCESS) {
-   PMD_INIT_LOG(ERR, "Failed to set the default outer "
-"VLAN ether type");
-   goto err_setup_pf_switch;
+   if (!pf->support_multi_driver) {
+   ret = i40e_vlan_tpid_set(dev, ETH_VLAN_TYPE_OUTER,
+ETHER_TYPE_VLAN);
+   if (ret != I40E_SUCCESS) {
+   PMD_INIT_LOG(ERR, "Failed to set the default outer "
+"VLAN ether type");
+  

[dpdk-dev] [PATCH v3 4/4] net/i40e: fix interrupt conflict when using multi-driver

2018-02-02 Thread Beilei Xing
There's interrupt conflict when using DPDK and Linux i40e
on different ports of the same Ethernet controller, this
patch fixes it by switching from IntN to Int0 if multiple
drivers are used.

Fixes: be6c228d4da3 ("i40e: support Rx interrupt")
Cc: sta...@dpdk.org

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.c| 93 +--
 drivers/net/i40e/i40e_ethdev.h| 10 +++--
 drivers/net/i40e/i40e_ethdev_vf.c |  4 +-
 3 files changed, 68 insertions(+), 39 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index ae0f31a..cae22e7 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -760,6 +760,23 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
+static inline void i40e_config_automask(struct i40e_pf *pf)
+{
+   struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+   uint32_t val;
+
+   /* INTENA flag is not auto-cleared for interrupt */
+   val = I40E_READ_REG(hw, I40E_GLINT_CTL);
+   val |= I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK |
+   I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK;
+
+   /* If support multi-driver, PF will use INT0. */
+   if (!pf->support_multi_driver)
+   val |= I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK;
+
+   I40E_WRITE_REG(hw, I40E_GLINT_CTL, val);
+}
+
 #define I40E_FLOW_CONTROL_ETHERTYPE  0x8808
 
 /*
@@ -1077,6 +1094,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
return ret;
}
 
+   i40e_config_automask(pf);
+
/*
 * To work around the NVM issue, initialize registers
 * for flexible payload and packet type of QinQ by
@@ -1463,6 +1482,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
msix_vect,
int i;
uint32_t val;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
+   struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
 
/* Bind all RX queues to allocated MSIX interrupt */
for (i = 0; i < nb_queue; i++) {
@@ -1481,7 +1501,8 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t 
msix_vect,
/* Write first RX queue to Link list register as the head element */
if (vsi->type != I40E_VSI_SRIOV) {
uint16_t interval =
-   i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL);
+   i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL,
+  pf->support_multi_driver);
 
if (msix_vect == I40E_MISC_VEC_ID) {
I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0,
@@ -1539,7 +1560,6 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
uint16_t nb_msix = RTE_MIN(vsi->nb_msix, intr_handle->nb_efd);
uint16_t queue_idx = 0;
int record = 0;
-   uint32_t val;
int i;
 
for (i = 0; i < vsi->nb_qps; i++) {
@@ -1547,13 +1567,6 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(vsi->base_queue + i), 0);
}
 
-   /* INTENA flag is not auto-cleared for interrupt */
-   val = I40E_READ_REG(hw, I40E_GLINT_CTL);
-   val |= I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK |
-   I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK |
-   I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK;
-   I40E_WRITE_REG(hw, I40E_GLINT_CTL, val);
-
/* VF bind interrupt */
if (vsi->type == I40E_VSI_SRIOV) {
__vsi_queues_bind_intr(vsi, msix_vect,
@@ -1606,27 +1619,22 @@ i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi)
struct rte_eth_dev *dev = vsi->adapter->eth_dev;
struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle;
struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
-   uint16_t interval = i40e_calc_itr_interval(\
-   RTE_LIBRTE_I40E_ITR_INTERVAL);
+   struct i40e_pf *pf = I40E_VSI_TO_PF(vsi);
uint16_t msix_intr, i;
 
-   if (rte_intr_allow_others(intr_handle))
+   if (rte_intr_allow_others(intr_handle) || !pf->support_multi_driver)
for (i = 0; i < vsi->nb_msix; i++) {
msix_intr = vsi->msix_intr + i;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(msix_intr - 1),
-   I40E_PFINT_DYN_CTLN_INTENA_MASK |
-   I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
-   (0 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-   (interval <<
-I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT));
+  I40E_PFINT_DYN_CTLN_INTENA_MASK |
+  I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
}
else
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0,
   I40E_PFINT_DYN_CTL0_INTENA_MASK |

[dpdk-dev] [PATCH v3 1/4] net/i40e: add warnings when writing global registers

2018-02-02 Thread Beilei Xing
Add warnings when writing global registers.

Signed-off-by: Beilei Xing 
Cc: sta...@dpdk.org
---
 doc/guides/nics/i40e.rst   | 12 
 drivers/net/i40e/i40e_ethdev.c | 15 +++
 drivers/net/i40e/i40e_ethdev.h | 43 ++
 3 files changed, 70 insertions(+)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 5780268..68a546b 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -459,3 +459,15 @@ Receive packets with Ethertype 0x88A8
 
 Due to the FW limitation, PF can receive packets with Ethertype 0x88A8
 only when floating VEB is disabled.
+
+Global configuration warning
+
+
+I40E PMD will set some global registers to enable some function or set some
+configure. Then when using different ports of the same NIC with Linux kernel
+and DPDK, the port with Linux kernel will be impacted by the port with DPDK.
+For example, register I40E_GL_SWT_L2TAGCTRL is used to control L2 tag, i40e
+PMD uses I40E_GL_SWT_L2TAGCTRL to set vlan TPID. If setting TPID in port A
+with DPDK, then the configuration will also impact port B in the NIC with
+kernel driver, which don't want to use the TPID.
+So PMD reports warning to clarify what is changed by writing global register.
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 0835c2d..44821f2 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -738,6 +738,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
I40E_WRITE_REG(hw, I40E_GLQF_ORT(33), 0x00E0);
I40E_WRITE_REG(hw, I40E_GLQF_ORT(34), 0x00E3);
I40E_WRITE_REG(hw, I40E_GLQF_ORT(35), 0x00E6);
+   i40e_global_cfg_warning(I40E_WARNING_ENA_FLX_PLD);
 
/*
 * Initialize registers for parsing packet type of QinQ
@@ -747,6 +748,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 */
I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x0029);
I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x9420);
+   i40e_global_cfg_warning(I40E_WARNING_QINQ_PARSER);
 }
 
 #define I40E_FLOW_CONTROL_ETHERTYPE  0x8808
@@ -2800,6 +2802,8 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
PMD_DRV_LOG(DEBUG, "Debug write 0x%08"PRIx64" to "
"I40E_GL_SWT_L2TAGCTRL[%d]", reg_w, reg_id);
 
+   i40e_global_cfg_warning(I40E_WARNING_TPID);
+
return ret;
 }
 
@@ -3038,6 +3042,7 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
I40E_WRITE_REG(hw, I40E_GLRPB_GLW,
   pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS]
   << I40E_KILOSHIFT);
+   i40e_global_cfg_warning(I40E_WARNING_FLOW_CTL);
 
I40E_WRITE_FLUSH(hw);
 
@@ -6875,6 +6880,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len)
   reg, NULL);
if (ret != 0)
return ret;
+   i40e_global_cfg_warning(I40E_WARNING_GRE_KEY_LEN);
} else {
ret = 0;
}
@@ -7154,6 +7160,7 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw,
} else {
i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(pctype), reg);
}
+   i40e_global_cfg_warning(I40E_WARNING_HSYM);
}
 
reg = i40e_read_rx_ctl(hw, I40E_GLQF_CTL);
@@ -7178,6 +7185,7 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw,
goto out;
 
i40e_write_rx_ctl(hw, I40E_GLQF_CTL, reg);
+   i40e_global_cfg_warning(I40E_WARNING_QF_CTL);
 
 out:
I40E_WRITE_FLUSH(hw);
@@ -7848,6 +7856,10 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
pf->hash_input_set[pctype] = input_set;
pf->fdir.input_set[pctype] = input_set;
}
+
+   i40e_global_cfg_warning(I40E_WARNING_HASH_INSET);
+   i40e_global_cfg_warning(I40E_WARNING_FD_MSK);
+   i40e_global_cfg_warning(I40E_WARNING_HASH_MSK);
 }
 
 int
@@ -7913,6 +7925,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
i40e_check_write_reg(hw, I40E_GLQF_HASH_INSET(1, pctype),
 (uint32_t)((inset_reg >>
 I40E_32_BIT_WIDTH) & UINT32_MAX));
+   i40e_global_cfg_warning(I40E_WARNING_HASH_INSET);
 
for (i = 0; i < num; i++)
i40e_check_write_reg(hw, I40E_GLQF_HASH_MSK(i, pctype),
@@ -7921,6 +7934,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
for (i = num; i < I40E_INSET_MASK_NUM_REG; i++)
i40e_check_write_reg(hw, I40E_GLQF_HASH_MSK(i, pctype),
 0);
+   i40e_global_cfg_warning(I40E_WARNING_HASH_MSK);
I40E_WRITE_FLUSH(hw);
 
pf->hash_input_set[pctype] = input_set;
@@ -7999,6 +8013,7 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf,
for (i = num; i < I40E_INSET_MASK_NUM_REG; i++)

[dpdk-dev] [PATCH v3 0/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Beilei Xing
DPDK i40e PMD will modify some global registers during initialization
and post initialization, there'll be impact during use of 700 series
Ethernet Adapter with both Linux kernel and DPDK PMD.
This patchset adds logs for global configuration and adds device args
to disable global configuration and change interrupt for PF.

This patchset is based on 16.11.4 LTS.
Commit id: 516447a5056c093e4d020011a69216b453576782

v3 changes:
 - Fix interrupt conflict when using multiple driver.

v2 changes:
 - Add warning logs and debug logs.
 

Beilei Xing (4):
  net/i40e: add warnings when writing global registers
  net/i40e: add debug logs when writing global registers
  net/i40e: fix multiple driver support issue
  net/i40e: fix interrupt conflict when using multi-driver

 doc/guides/nics/i40e.rst  |  12 ++
 drivers/net/i40e/i40e_ethdev.c| 412 --
 drivers/net/i40e/i40e_ethdev.h|  63 +-
 drivers/net/i40e/i40e_ethdev_vf.c |   4 +-
 4 files changed, 377 insertions(+), 114 deletions(-)

-- 
2.5.5



Re: [dpdk-dev] [PATCH v4 0/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Wu, Jingjing


> -Original Message-
> From: Xing, Beilei
> Sent: Friday, February 2, 2018 8:06 PM
> To: dev@dpdk.org; Wu, Jingjing 
> Subject: [PATCH v4 0/4] net/i40e: fix multiple driver support issue
> 
> DPDK i40e PMD will modify some global registers during initialization
> and post initialization, there'll be impact during use of 700 series
> Ethernet Adapter with both Linux kernel and DPDK PMD.
> This patchset adds log for global configuration and adds device args
> to disable global configuration and change interrupt for PF.
> 
> v4 changes:
>  - Fix interrupt conflict when using multiple driver.
> 
> v3 changes:
>  - Reword commit log.
> 
> v2 changes:
>  - Add debug log when writing global registers
>   - Add option to disable writing global registers
> 
> 
> Beilei Xing (4):
>   net/i40e: add warnings when writing global registers
>   net/i40e: add debug logs when writing global registers
>   net/i40e: fix multiple driver support issue
>   net/i40e: fix interrupt conflict when using multi-driver
> 
>  doc/guides/nics/i40e.rst  |  12 +
>  drivers/net/i40e/i40e_ethdev.c| 477 
> ++
>  drivers/net/i40e/i40e_ethdev.h|  69 +-
>  drivers/net/i40e/i40e_ethdev_vf.c |   4 +-
>  drivers/net/i40e/i40e_fdir.c  |  40 ++--
>  drivers/net/i40e/i40e_flow.c  |   9 +
>  6 files changed, 487 insertions(+), 124 deletions(-)
> 
> --
> 2.5.5

Acked-by: Jingjing Wu 



[dpdk-dev] [PATCH 1/2] net/sfc: rename version map file

2018-02-02 Thread Andrew Rybchenko
From: Ivan Malov 

The version map filename does not comply with the format
used by meson build rules for drivers (i.e. on the upper
level) and needs to be revisited. This patch removes efx
postfix from the driver title in the filename.

Signed-off-by: Ivan Malov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/Makefile| 2 +-
 drivers/net/sfc/rte_pmd_sfc_efx_version.map | 4 
 drivers/net/sfc/rte_pmd_sfc_version.map | 4 
 3 files changed, 5 insertions(+), 5 deletions(-)
 delete mode 100644 drivers/net/sfc/rte_pmd_sfc_efx_version.map
 create mode 100644 drivers/net/sfc/rte_pmd_sfc_version.map

diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile
index cc4e4e5..8a671dd 100644
--- a/drivers/net/sfc/Makefile
+++ b/drivers/net/sfc/Makefile
@@ -60,7 +60,7 @@ BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir 
$(wildcard $(SRCDIR)/base/*.
 $(foreach obj, $(BASE_DRIVER_OBJS), \
   $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
-EXPORT_MAP := rte_pmd_sfc_efx_version.map
+EXPORT_MAP := rte_pmd_sfc_version.map
 
 LIBABIVER := 1
 
diff --git a/drivers/net/sfc/rte_pmd_sfc_efx_version.map 
b/drivers/net/sfc/rte_pmd_sfc_efx_version.map
deleted file mode 100644
index 31eca32..000
--- a/drivers/net/sfc/rte_pmd_sfc_efx_version.map
+++ /dev/null
@@ -1,4 +0,0 @@
-DPDK_17.02 {
-
-   local: *;
-};
diff --git a/drivers/net/sfc/rte_pmd_sfc_version.map 
b/drivers/net/sfc/rte_pmd_sfc_version.map
new file mode 100644
index 000..31eca32
--- /dev/null
+++ b/drivers/net/sfc/rte_pmd_sfc_version.map
@@ -0,0 +1,4 @@
+DPDK_17.02 {
+
+   local: *;
+};
-- 
2.7.4



[dpdk-dev] [PATCH 2/2] net/sfc: add support for meson build

2018-02-02 Thread Andrew Rybchenko
From: Ivan Malov 

Signed-off-by: Ivan Malov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/meson.build  |  2 +-
 drivers/net/sfc/base/meson.build | 69 
 drivers/net/sfc/meson.build  | 61 +++
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/sfc/base/meson.build
 create mode 100644 drivers/net/sfc/meson.build

diff --git a/drivers/net/meson.build b/drivers/net/meson.build
index f19a586..704cbe3 100644
--- a/drivers/net/meson.build
+++ b/drivers/net/meson.build
@@ -4,7 +4,7 @@
 drivers = ['af_packet', 'bonding',
'e1000', 'fm10k', 'i40e', 'ixgbe',
'null', 'octeontx', 'pcap', 'ring',
-   'thunderx']
+   'sfc', 'thunderx']
 std_deps = ['ethdev', 'kvargs'] # 'ethdev' also pulls in mbuf, net, eal etc
 std_deps += ['bus_pci'] # very many PMDs depend on PCI, so make std
 std_deps += ['bus_vdev']# same with vdev bus
diff --git a/drivers/net/sfc/base/meson.build b/drivers/net/sfc/base/meson.build
new file mode 100644
index 000..fad4d4c
--- /dev/null
+++ b/drivers/net/sfc/base/meson.build
@@ -0,0 +1,69 @@
+# Copyright (c) 2016-2018 Solarflare Communications Inc.
+# All rights reserved.
+#
+# This software was jointly developed between OKTET Labs (under contract
+# for Solarflare) and Solarflare Communications, Inc.
+
+sources = [
+   'efx_bootcfg.c',
+   'efx_crc32.c',
+   'efx_ev.c',
+   'efx_filter.c',
+   'efx_hash.c',
+   'efx_intr.c',
+   'efx_lic.c',
+   'efx_mac.c',
+   'efx_mcdi.c',
+   'efx_mon.c',
+   'efx_nic.c',
+   'efx_nvram.c',
+   'efx_phy.c',
+   'efx_port.c',
+   'efx_rx.c',
+   'efx_sram.c',
+   'efx_tunnel.c',
+   'efx_tx.c',
+   'efx_vpd.c',
+   'mcdi_mon.c',
+   'siena_mac.c',
+   'siena_mcdi.c',
+   'siena_nic.c',
+   'siena_nvram.c',
+   'siena_phy.c',
+   'siena_sram.c',
+   'siena_vpd.c',
+   'ef10_ev.c',
+   'ef10_filter.c',
+   'ef10_intr.c',
+   'ef10_mac.c',
+   'ef10_mcdi.c',
+   'ef10_nic.c',
+   'ef10_nvram.c',
+   'ef10_phy.c',
+   'ef10_rx.c',
+   'ef10_tx.c',
+   'ef10_vpd.c',
+   'hunt_nic.c',
+   'medford_nic.c'
+]
+
+extra_flags = [
+   '-Wno-sign-compare',
+   '-Wno-unused-parameter',
+   '-Wno-unused-variable',
+   '-Wno-empty-body',
+   '-Wno-unused-but-set-variable'
+]
+
+c_args = cflags
+foreach flag: extra_flags
+   if cc.has_argument(flag)
+   c_args += flag
+   endif
+endforeach
+
+base_lib = static_library('sfc_base', sources,
+   dependencies: static_rte_eal,
+   c_args: c_args)
+
+base_objs = base_lib.extract_all_objects()
diff --git a/drivers/net/sfc/meson.build b/drivers/net/sfc/meson.build
new file mode 100644
index 000..b603579
--- /dev/null
+++ b/drivers/net/sfc/meson.build
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Copyright (c) 2016-2018 Solarflare Communications Inc.
+# All rights reserved.
+#
+# This software was jointly developed between OKTET Labs (under contract
+# for Solarflare) and Solarflare Communications, Inc.
+
+allow_experimental_apis = true
+
+extra_flags = []
+extra_flags += '-I' + meson.current_source_dir() + '/base'
+extra_flags += '-I' + meson.current_source_dir()
+extra_flags += '-O3'
+
+# Strict-aliasing rules are violated by rte_eth_link to uint64_t casts
+extra_flags += '-Wno-strict-aliasing'
+
+# Enable more warnings
+extra_flags += [
+   '-Wextra',
+   '-Wdisabled-optimization'
+]
+
+# Compiler and version dependent flags
+extra_flags += [
+   '-Waggregate-return',
+   '-Wnested-externs',
+   '-Wbad-function-cast'
+]
+
+# Suppress ICC false positive warning on 'bulk' may be used before its
+# value is set
+extra_flags += '-wd3656'
+
+foreach flag: extra_flags
+   if cc.has_argument(flag)
+   cflags += flag
+   endif
+endforeach
+
+subdir('base')
+objs = [base_objs]
+
+sources = files(
+   'sfc_ethdev.c',
+   'sfc_kvargs.c',
+   'sfc.c',
+   'sfc_mcdi.c',
+   'sfc_intr.c',
+   'sfc_ev.c',
+   'sfc_port.c',
+   'sfc_rx.c',
+   'sfc_tx.c',
+   'sfc_tso.c',
+   'sfc_filter.c',
+   'sfc_flow.c',
+   'sfc_dp.c',
+   'sfc_ef10_rx.c',
+   'sfc_ef10_tx.c'
+)
-- 
2.7.4



Re: [dpdk-dev] [PATCH] pmdinfogen: fix resource leak of FILE object

2018-02-02 Thread Neil Horman
On Fri, Feb 02, 2018 at 12:00:58PM +, Bruce Richardson wrote:
> Coverity flags an issue where the resources used by the FILE object for
> the temporary input file are leaked. This is a very minor issue, but is
> easily fixed, while also avoiding later problems where we try to close
> an invalid file descriptor in the failure case.
> 
> The fix is to use "dup()" to get a new file descriptor number rather than
> using the value directly from fileno. This allows us to close the file
> opened with tmpfile() within in scope block, while allowing the duplicate
> to pass to the outer block and be closed when the function terminates.
> 
> As a side-effect I/O in the function is therefore changed from using stdio
> fread/fwrite to read/write system calls.
> 
> Coverity issue: 260399
> Fixes: 0d68533617e3 ("pmdinfogen: allow using stdin and stdout")
> 
> Signed-off-by: Bruce Richardson 
> ---
>  buildtools/pmdinfogen/pmdinfogen.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/buildtools/pmdinfogen/pmdinfogen.c 
> b/buildtools/pmdinfogen/pmdinfogen.c
> index 45b267346..0f35ca46b 100644
> --- a/buildtools/pmdinfogen/pmdinfogen.c
> +++ b/buildtools/pmdinfogen/pmdinfogen.c
> @@ -50,20 +50,24 @@ static void *grab_file(const char *filename, unsigned 
> long *size)
>   /* from stdin, use a temporary file to mmap */
>   FILE *infile;
>   char buffer[1024];
> - size_t n;
> + int n;
>  
>   infile = tmpfile();
>   if (infile == NULL) {
>   perror("tmpfile");
>   return NULL;
>   }
> - while (!feof(stdin)) {
> - n = fread(buffer, 1, sizeof(buffer), stdin);
> - if (fwrite(buffer, 1, n, infile) != n)
> + fd = dup(fileno(infile));
> + fclose(infile);
> + if (fd < 0)
> + return NULL;
> +
> + n = read(STDIN_FILENO, buffer, sizeof(buffer));
> + while (n > 0) {
> + if (write(fd, buffer, n) != n)
>   goto failed;
> + n = read(STDIN_FILENO, buffer, sizeof(buffer));
>   }
> - fflush(infile);
> - fd = fileno(infile);
>   }
>  
>   if (fstat(fd, &st))
> -- 
> 2.14.3
> 
> 

Wouldn't it be just as good, and easier to check fd for == -1 as a condition of
calling close?

like 
failed:
if (fd >= 0)
close(fd);




Re: [dpdk-dev] [PATCH v4 0/4] net/i40e: fix multiple driver support issue

2018-02-02 Thread Zhang, Helin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Wu, Jingjing
> Sent: Friday, February 2, 2018 8:42 PM
> To: Xing, Beilei; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 0/4] net/i40e: fix multiple driver support
> issue
> 
> 
> 
> > -Original Message-
> > From: Xing, Beilei
> > Sent: Friday, February 2, 2018 8:06 PM
> > To: dev@dpdk.org; Wu, Jingjing 
> > Subject: [PATCH v4 0/4] net/i40e: fix multiple driver support issue
> >
> > DPDK i40e PMD will modify some global registers during initialization
> > and post initialization, there'll be impact during use of 700 series
> > Ethernet Adapter with both Linux kernel and DPDK PMD.
> > This patchset adds log for global configuration and adds device args
> > to disable global configuration and change interrupt for PF.
> >
> > v4 changes:
> >  - Fix interrupt conflict when using multiple driver.
> >
> > v3 changes:
> >  - Reword commit log.
> >
> > v2 changes:
> >  - Add debug log when writing global registers
> >   - Add option to disable writing global registers
> >
> >
> > Beilei Xing (4):
> >   net/i40e: add warnings when writing global registers
> >   net/i40e: add debug logs when writing global registers
> >   net/i40e: fix multiple driver support issue
> >   net/i40e: fix interrupt conflict when using multi-driver
> >
> >  doc/guides/nics/i40e.rst  |  12 +
> >  drivers/net/i40e/i40e_ethdev.c| 477 ++-
> ---
> >  drivers/net/i40e/i40e_ethdev.h|  69 +-
> >  drivers/net/i40e/i40e_ethdev_vf.c |   4 +-
> >  drivers/net/i40e/i40e_fdir.c  |  40 ++--
> >  drivers/net/i40e/i40e_flow.c  |   9 +
> >  6 files changed, 487 insertions(+), 124 deletions(-)
> >
> > --
> > 2.5.5
> 
> Acked-by: Jingjing Wu 
Applied the series to dpdk-next-net-intel sub tree, thanks!

/Helin



Re: [dpdk-dev] [PATCH FIX-OPTION-1] mbuf: fix the logic of user mempool ops API

2018-02-02 Thread Olivier Matz
On Fri, Feb 02, 2018 at 01:33:01PM +0530, Hemant Agrawal wrote:
> From: Nipun Gupta 
> 
> The existing rte_eal_mbuf_default mempool ops can return the compile time
> default ops name if the user has not provided command line inputs for
> mempool ops name. It will break the logic of best mempool ops as it will
> never return platform hw mempool ops.
> 
> This patch introduces a new API to just return the user mempool ops only.
> 
> Fixes: 8b0f7f434132 ("mbuf: maintain user and compile time mempool ops name")
> 
> Signed-off-by: Nipun Gupta 

This option is fine for me. I think we may also consider deprecating
rte_eal_mbuf_default_mempool_ops(), as it is done in option 2.

Acked-by: Olivier Matz 


Re: [dpdk-dev] [PATCH FIX-OPTION-1] mbuf: fix the logic of user mempool ops API

2018-02-02 Thread Hemant Agrawal
Hi Olivier,

> On Fri, Feb 02, 2018 at 01:33:01PM +0530, Hemant Agrawal wrote:
> > From: Nipun Gupta 
> >
> > The existing rte_eal_mbuf_default mempool ops can return the compile
> > time default ops name if the user has not provided command line inputs
> > for mempool ops name. It will break the logic of best mempool ops as
> > it will never return platform hw mempool ops.
> >
> > This patch introduces a new API to just return the user mempool ops only.
> >
> > Fixes: 8b0f7f434132 ("mbuf: maintain user and compile time mempool ops
> > name")
> >
> > Signed-off-by: Nipun Gupta 
> 
> This option is fine for me. I think we may also consider deprecating
> rte_eal_mbuf_default_mempool_ops(), as it is done in option 2.

[Hemant]   Ok. Please also ack following. I will update patchwork for remaining 
patches accordingly.
[PATCH v2] doc: remove eal API for default mempool ops name

> Acked-by: Olivier Matz 

Thanks
Hemant


Re: [dpdk-dev] [PATCH v2] doc: remove eal API for default mempool ops name

2018-02-02 Thread Olivier Matz
On Fri, Feb 02, 2018 at 02:01:42PM +0530, Hemant Agrawal wrote:
> Signed-off-by: Hemant Agrawal 
> ---
> v2: fix checkpatch errors
> 
>  doc/guides/rel_notes/deprecation.rst | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/deprecation.rst 
> b/doc/guides/rel_notes/deprecation.rst
> index d59ad59..c7d8f25 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -8,6 +8,15 @@ API and ABI deprecation notices are to be posted here.
>  Deprecation Notices
>  ---
>  
> +* eal: a new set of mbuf mempool ops name APIs for user, platform and best
> +  mempool names have been defined in ``rte_mbuf`` in v18.02. The uses of
> +  ``rte_eal_mbuf_default_mempool_ops`` shall be replaced by
> +  ``rte_mbuf_best_mempool_ops``.
> +  The following function is now redundant and it is target to be deprecated 
> in
> +  18.05:
> +
> +  - ``rte_eal_mbuf_default_mempool_ops``
> +
>  * eal: several API and ABI changes are planned for ``rte_devargs`` in v18.02.
>The format of device command line parameters will change. The bus will need
>to be explicitly stated in the device declaration. The enum ``rte_devtype``

Acked-by: Olivier Matz 


[dpdk-dev] [PATCH v3] eal: add error check for core options

2018-02-02 Thread Marko Kovacevic
Error information on current core usage list, mask or map
were incomplete. Added states to differentiate core usage
and to inform user.

Signed-off-by: Marko Kovacevic 
Reviewed-by: Anatoly Burakov 

---

V3:
 - Changed to reflect the coding guidelines - Bruce
 - update the documentation for better clarity - Bruce
 - Added back the reviewer information - Anatoly

V2:
 - Cleaned up the logging for error cases - Anatoly
---
 doc/guides/testpmd_app_ug/run_app.rst  |  4 
 lib/librte_eal/common/eal_common_options.c | 36 +++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/doc/guides/testpmd_app_ug/run_app.rst 
b/doc/guides/testpmd_app_ug/run_app.rst
index 46da1df..85e725f 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -62,6 +62,10 @@ See the DPDK Getting Started Guides for more information on 
these options.
 The grouping ``()`` can be omitted for single element group.
 The ``@`` can be omitted if cpus and lcores have the same value.
 
+.. Note::
+At a given instance only one core option ``--lcores``, ``-l`` or ``-c`` 
can be used.
+
+
 *   ``--master-lcore ID``
 
 Core ID that is used as master.
diff --git a/lib/librte_eal/common/eal_common_options.c 
b/lib/librte_eal/common/eal_common_options.c
index b6d2762..66f0868 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -57,6 +57,9 @@
 #include "eal_filesystem.h"
 
 #define BITS_PER_HEX 4
+#define LCORE_OPT_LST 1
+#define LCORE_OPT_MSK 2
+#define LCORE_OPT_MAP 3
 
 const char
 eal_short_options[] =
@@ -1028,7 +1031,16 @@ eal_parse_common_option(int opt, const char *optarg,
RTE_LOG(ERR, EAL, "invalid coremask\n");
return -1;
}
-   core_parsed = 1;
+
+   if (core_parsed) {
+   RTE_LOG(ERR, EAL, "Option -c is ignored, because (%s) 
is set!\n",
+   (core_parsed == LCORE_OPT_LST) ? "-l" :
+   (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+   "-c");
+   return -1;
+   }
+
+   core_parsed = LCORE_OPT_MSK;
break;
/* corelist */
case 'l':
@@ -1036,7 +1048,16 @@ eal_parse_common_option(int opt, const char *optarg,
RTE_LOG(ERR, EAL, "invalid core list\n");
return -1;
}
-   core_parsed = 1;
+
+   if (core_parsed) {
+   RTE_LOG(ERR, EAL, "Option -l is ignored, because (%s) 
is set!\n",
+   (core_parsed == LCORE_OPT_MSK) ? "-c" :
+   (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+   "-l");
+   return -1;
+   }
+
+   core_parsed = LCORE_OPT_LST;
break;
/* service coremask */
case 's':
@@ -1156,7 +1177,16 @@ eal_parse_common_option(int opt, const char *optarg,
OPT_LCORES "\n");
return -1;
}
-   core_parsed = 1;
+
+   if (core_parsed) {
+   RTE_LOG(ERR, EAL, "Option --lcore is ignored, because 
(%s) is set!\n",
+   (core_parsed == LCORE_OPT_LST) ? "-l" :
+   (core_parsed == LCORE_OPT_MSK) ? "-c" :
+   "--lcore");
+   return -1;
+   }
+
+   core_parsed = LCORE_OPT_MAP;
break;
 
/* don't know what to do, leave this to caller */
-- 
2.9.5



[dpdk-dev] [PATCH v3] doc: update definition of lcore id and lcore index

2018-02-02 Thread Marko Kovacevic
Added examples in lcore index for better explanation on
various examples, Sited examples for lcore id.

Signed-off-by: Marko Kovacevic 

---
V3:
 - Rephrased examples for lcore index and id - Bruce

V2:
 - Added clearer description to lcore id - Bruce
 - Reframed examples for lcore index - Bruce
---
 lib/librte_eal/common/include/rte_lcore.h | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_lcore.h 
b/lib/librte_eal/common/include/rte_lcore.h
index d84bcff..0472220 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -57,7 +57,14 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread 
"lcore id". */
 RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
 
 /**
- * Return the ID of the execution unit we are running on.
+ * Return the Application thread ID of the execution unit.
+ *
+ * Note: in most cases the lcore id returned here will also correspond
+ *   to the processor id of the CPU on which the thread is pinned, this
+ *   will not be the case if the user has explicitly changed the thread to
+ *   core affinities using --lcores EAL argument e.g. --lcores '(0-3)@10'
+ *   to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
+ *
  * @return
  *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
  */
@@ -94,7 +101,12 @@ rte_lcore_count(void)
 
 /**
  * Return the index of the lcore starting from zero.
- * The order is physical or given by command line (-l option).
+ *
+ * When option -c or -l is given, the index corresponds
+ * to the order in the list.
+ * For example:
+ * -c 0x30, lcore 4 has index 0, and 5 has index 1.
+ * -l 22,18 lcore 22 has index 0, and 18 has index 1.
  *
  * @param lcore_id
  *   The targeted lcore, or -1 for the current one.
-- 
2.9.5



Re: [dpdk-dev] FW: [RFC v1 1/1] lib/cryptodev: add support of asymmetric crypto

2018-02-02 Thread Jain, Deepak K
HI Shally,

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Verma, Shally
> Subject: [dpdk-dev] FW: [RFC v1 1/1] lib/cryptodev: add support of
> asymmetric crypto
> 
> Hi Pablo/Fiona
> 
> Could you please provide your input on this RFC. Your feedback is awaited.


Many thanks for  sending the RFC. We are looking into this and will provide 
feedback by end of business next week.

> 
> Thanks
> Shally
> 
> -Original Message-
> From: Verma, Shally
> Sent: 23 January 2018 15:24
> To: declan.dohe...@intel.com
> Cc: dev@dpdk.org; Athreya, Narayana Prasad
> ; Murthy, Nidadavolu
> ; Sahu, Sunila
> ; Gupta, Ashish ;
> Verma, Shally 
> Subject: [RFC v1 1/1] lib/cryptodev: add support of asymmetric crypto
> 
> From: Shally Verma 
> 
> Add support for asymmetric crypto operations in DPDK lib cryptodev
> 
> Key feature include:
> - Only session based asymmetric crypto operations
> - new get and set APIs for symmetric and asymmetric session private
>   data and other informations
> - APIs to create, configure and attch queue pair to asymmetric sessions
> - new capabilities in struct device_info to indicate
>   -- number of dedicated queue pairs available for symmetric and
>  asymmetric operations, if any
>   -- number of asymmetric sessions possible per qp
> 
> Proposed asymmetric cryptographic operations are:
> - rsa
> - dsa
> - deffie-hellman key pair generation and shared key computation
> - ecdeffie-hellman
> - fundamental elliptic curve operations
> - elliptic curve DSA
> - modular exponentiation and inversion
> 
> This patch primarily defines PMD operations and device capabilities
> to perform asymmetric crypto ops on queue pairs and intend to
> invite feedbacks on current proposal so as to ensure it encompass
> all kind of crypto devices with different capabilities and queue
> pair management.
> 
> List of TBDs:
> - Currently, patch only updated for RSA xform and associated params.
>   Other algoritms to be added in subsequent versions.
> - per-service stats update
> 
> Signed-off-by: Shally Verma 
> ---
> 
> It is derivative of RFC v2 asymmetric crypto patch series initiated by
> Umesh Kartha(mailto:umesh.kar...@caviumnetworks.com):
> 
>  http://dpdk.org/dev/patchwork/patch/24245/
>  http://dpdk.org/dev/patchwork/patch/24246/
>  http://dpdk.org/dev/patchwork/patch/24247/
> 
> And inclusive of all review comments given on RFC v2.
>  ( See complete discussion thread here:
> http://dev.dpdk.narkive.com/yqTFFLHw/dpdk-dev-rfc-specifications-for-
> asymmetric-crypto-algorithms#post12)
> 
> Some of the RFCv2 Review comments pending for closure:
> > " [Fiona] The count fn isn't used at all for sym - probably no need to add 
> > for
> asym
>  better instead to remove the sym fn."
> 
>  It is still present in dpdk-next-crypto for sym, so what has been 
> decision
>  on it?
> 
> >"[Fiona] if each qp can handle only a specific service, i.e. a subset off the
> capabilities
> Indicated by the device capability list, there's a need for a new API to 
> query
> the capability of a qp."
> 
> Current proposal doesn’t distinguish between device capability and qp
> capability.
> It rather leave such differences handling internal to PMDs. Thus no
> capability
> or API added for qp in current version. It is subject to revisit based on
> review
> feedback on current proposal.
> 
> - Sessionless Support.
> Current proposal only support Session-based because:
>  1. All one-time setup i.e.  algos and associated params, such as, public-
> private keys
> or modulus length can be done in control path using session-init API
>  2. it’s an easier way to dedicate qp to do specific service (using
> queue_pair_attach())
> which cannot be case in sessionless
>  3. Couldn’t find any significant advantage going sessionless way. Also
> existing most of PMDs are session-based.
> 
> It could be added in subsequent versions, if requirement is identified,
> based on review comment
> on this RFC.
> 
> Summary
> ---
> 
> This section provides an overview of key feature enabled in current
> specification.
> It comprise of key design challenges as have been identified on RFCv2 and
> summary description of new interfaces and definitions added to handle
> same.
> 
> Description
> ---
> 
> This API set assumes that the max_nb_queue_pairs on a
> device can be allocated to any mix of sym or asym. Some devices
> may have a fixed max per service. Thus, rte_cryptodev_info
> is updated with max_sym_nb_queues and max_asym_nb_queues with rule:
> 
> max_nb_queue_pair = max_nb_sym_qp + max_nb_asym_qp.
> 
> If device has no restrictions on qp to be used per service, such PMDs can
> leave
> max_nb_sym_qp = max_nb_asym_qp = 0. In such case, application can
> setup any of
> the service upto limit defined by max_nb_queue_pair.
> 
> Here, max_nb_sym_qp and max_nb_asym_qp, if non-zero, just define limit
> on qp which are
> available for each ser

[dpdk-dev] [PATCH v1 2/4] net/mlx: fix missing includes for rdma-core glue

2018-02-02 Thread Adrien Mazarguil
For consistency since these includes are already pulled by others.

Fixes: 6aca97d310 ("net/mlx4: move rdma-core calls to separate file")
Fixes: 7202118686 ("net/mlx5: move rdma-core calls to separate file")

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_glue.c | 3 +++
 drivers/net/mlx4/mlx4_glue.h | 3 +++
 drivers/net/mlx5/mlx5_glue.h | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c
index 30797bd2b..47ae7ad0f 100644
--- a/drivers/net/mlx4/mlx4_glue.c
+++ b/drivers/net/mlx4/mlx4_glue.c
@@ -3,6 +3,9 @@
  * Copyright 2018 Mellanox
  */
 
+#include 
+#include 
+
 /* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h
index 0623511f2..de251c622 100644
--- a/drivers/net/mlx4/mlx4_glue.h
+++ b/drivers/net/mlx4/mlx4_glue.h
@@ -6,6 +6,9 @@
 #ifndef MLX4_GLUE_H_
 #define MLX4_GLUE_H_
 
+#include 
+#include 
+
 /* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index 6afb629ff..7fed302ba 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -6,6 +6,7 @@
 #ifndef MLX5_GLUE_H_
 #define MLX5_GLUE_H_
 
+#include 
 #include 
 
 /* Verbs headers do not support -pedantic. */
-- 
2.11.0


[dpdk-dev] [PATCH v1 0/4] net/mlx: enhance rdma-core glue configuration

2018-02-02 Thread Adrien Mazarguil
The decision to deliver mlx4/mlx5 rdma-core glue plug-ins separately instead
of generating them at run time due to security concerns [1] led to a few
issues:

- They must be present on the file system before running DPDK.
- Their location must be known to the dynamic linker.
- Their names overlap and ABI compatibility is not guaranteed, which may
  lead to crashes.

This series addresses the above by adding version information to plug-ins
and taking CONFIG_RTE_EAL_PMD_PATH into account to locate them on the file
system.

[1] http://dpdk.org/ml/archives/dev/2018-January/089617.html

Adrien Mazarguil (4):
  net/mlx: add debug checks to glue structure
  net/mlx: fix missing includes for rdma-core glue
  net/mlx: version rdma-core glue libraries
  net/mlx: make rdma-core glue path configurable

 doc/guides/nics/mlx4.rst | 17 
 doc/guides/nics/mlx5.rst | 14 ++
 drivers/net/mlx4/Makefile|  8 --
 drivers/net/mlx4/mlx4.c  | 57 ++-
 drivers/net/mlx4/mlx4_glue.c |  4 +++
 drivers/net/mlx4/mlx4_glue.h |  9 +++
 drivers/net/mlx5/Makefile|  8 --
 drivers/net/mlx5/mlx5.c  | 57 ++-
 drivers/net/mlx5/mlx5_glue.c |  1 +
 drivers/net/mlx5/mlx5_glue.h |  7 +
 10 files changed, 176 insertions(+), 6 deletions(-)

-- 
2.11.0


[dpdk-dev] [PATCH v1 1/4] net/mlx: add debug checks to glue structure

2018-02-02 Thread Adrien Mazarguil
This code should catch mistakes early if a glue structure member is added
without a corresponding implementation in the library.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4.c | 9 +
 drivers/net/mlx5/mlx5.c | 9 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 50a55ee52..201d39b6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -799,6 +799,15 @@ rte_mlx4_pmd_init(void)
return;
assert(mlx4_glue);
 #endif
+#ifndef NDEBUG
+   /* Glue structure must not contain any NULL pointers. */
+   {
+   unsigned int i;
+
+   for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i)
+   assert(((const void *const *)mlx4_glue)[i]);
+   }
+#endif
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
 }
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 544599b01..050cfac0d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1142,6 +1142,15 @@ rte_mlx5_pmd_init(void)
return;
assert(mlx5_glue);
 #endif
+#ifndef NDEBUG
+   /* Glue structure must not contain any NULL pointers. */
+   {
+   unsigned int i;
+
+   for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
+   assert(((const void *const *)mlx5_glue)[i]);
+   }
+#endif
mlx5_glue->fork_init();
rte_pci_register(&mlx5_driver);
 }
-- 
2.11.0


[dpdk-dev] [PATCH v1 3/4] net/mlx: version rdma-core glue libraries

2018-02-02 Thread Adrien Mazarguil
When built as separate objects, these libraries do not have unique names.
Since they do not maintain a stable ABI, loading an incompatible library
may result in a crash (e.g. in case multiple versions are installed).

This patch addresses the above by versioning glue libraries, both on the
file system (version suffix) and by comparing a dedicated version field
member in glue structures.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/Makefile| 8 ++--
 drivers/net/mlx4/mlx4.c  | 5 +
 drivers/net/mlx4/mlx4_glue.c | 1 +
 drivers/net/mlx4/mlx4_glue.h | 6 ++
 drivers/net/mlx5/Makefile| 8 ++--
 drivers/net/mlx5/mlx5.c  | 5 +
 drivers/net/mlx5/mlx5_glue.c | 1 +
 drivers/net/mlx5/mlx5_glue.h | 6 ++
 8 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index c004ac71c..cc9db9977 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 # Library name.
 LIB = librte_pmd_mlx4.a
-LIB_GLUE = librte_pmd_mlx4_glue.so
+LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
+LIB_GLUE_BASE = librte_pmd_mlx4_glue.so
+LIB_GLUE_VERSION = 18.02.1
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
@@ -64,6 +66,7 @@ CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
 ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
 CFLAGS += -DMLX4_GLUE='"$(LIB_GLUE)"'
+CFLAGS += -DMLX4_GLUE_VERSION='"$(LIB_GLUE_VERSION)"'
 CFLAGS_mlx4_glue.o += -fPIC
 LDLIBS += -ldl
 else
@@ -131,6 +134,7 @@ $(LIB): $(LIB_GLUE)
 
 $(LIB_GLUE): mlx4_glue.o
$Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \
+   -Wl,-h,$(LIB_GLUE) \
-s -shared -o $@ $< -libverbs -lmlx4
 
 mlx4_glue.o: mlx4_autoconf.h
@@ -139,6 +143,6 @@ endif
 
 clean_mlx4: FORCE
$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
-   $Q rm -f -- mlx4_glue.o $(LIB_GLUE)
+   $Q rm -f -- mlx4_glue.o $(LIB_GLUE_BASE)*
 
 clean: clean_mlx4
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 201d39b6e..61a852fb9 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -808,6 +808,11 @@ rte_mlx4_pmd_init(void)
assert(((const void *const *)mlx4_glue)[i]);
}
 #endif
+   if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) {
+   ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required",
+ mlx4_glue->version, MLX4_GLUE_VERSION);
+   return;
+   }
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
 }
diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c
index 47ae7ad0f..3b79d320e 100644
--- a/drivers/net/mlx4/mlx4_glue.c
+++ b/drivers/net/mlx4/mlx4_glue.c
@@ -240,6 +240,7 @@ mlx4_glue_dv_set_context_attr(struct ibv_context *context,
 }
 
 const struct mlx4_glue *mlx4_glue = &(const struct mlx4_glue){
+   .version = MLX4_GLUE_VERSION,
.fork_init = mlx4_glue_fork_init,
.get_async_event = mlx4_glue_get_async_event,
.ack_async_event = mlx4_glue_ack_async_event,
diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h
index de251c622..368f906bf 100644
--- a/drivers/net/mlx4/mlx4_glue.h
+++ b/drivers/net/mlx4/mlx4_glue.h
@@ -19,7 +19,13 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#ifndef MLX4_GLUE_VERSION
+#define MLX4_GLUE_VERSION ""
+#endif
+
+/* LIB_GLUE_VERSION must be updated every time this structure is modified. */
 struct mlx4_glue {
+   const char *version;
int (*fork_init)(void);
int (*get_async_event)(struct ibv_context *context,
   struct ibv_async_event *event);
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 4b20d718b..4086f2039 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 # Library name.
 LIB = librte_pmd_mlx5.a
-LIB_GLUE = librte_pmd_mlx5_glue.so
+LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
+LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
+LIB_GLUE_VERSION = 18.02.1
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
@@ -74,6 +76,7 @@ CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"'
+CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"'
 CFLAGS_mlx5_glue.o += -fPIC
 LDLIBS += -ldl
 else
@@ -180,6 +183,7 @@ $(LIB): $(LIB_GLUE)
 
 $(LIB_GLUE): mlx5_glue.o
$Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \
+   -Wl,-h,$(LIB_GLUE) \
-s -shared -o $@ $< -libverbs -lmlx5
 
 mlx5_glue.o: mlx5_autoconf.h
@@ -188,6 +192,6 @@ endif
 
 clean_mlx5: FORCE
$Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new
-   $Q rm -f -- mlx5_glue.o $(LIB_GLUE)
+   $Q rm -f -- mlx5_glue.o $(LIB_GLUE_BASE)*
 
 clean: clean_mlx5
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 050cfac0d..341230d2b 1

[dpdk-dev] [PATCH v1 4/4] net/mlx: make rdma-core glue path configurable

2018-02-02 Thread Adrien Mazarguil
Since rdma-core glue libraries are intrinsically tied to their respective
PMDs and used as internal plug-ins, their presence in the default search
path among other system libraries for the dynamic linker is not necessarily
desired.

This commit enables their installation and subsequent look-up at run time
in RTE_EAL_PMD_PATH if configured to a nonempty string. This path can also
be overridden by environment variables MLX[45]_GLUE_PATH.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/nics/mlx4.rst | 17 +
 doc/guides/nics/mlx5.rst | 14 ++
 drivers/net/mlx4/mlx4.c  | 43 ++-
 drivers/net/mlx5/mlx5.c  | 43 ++-
 4 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 88161781c..9e4fbf692 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -97,6 +97,11 @@ These options can be modified in the ``.config`` file.
   ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as
   missing with ``ldd(1)``.
 
+  It works by moving these dependencies to a purpose-built rdma-core "glue"
+  plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if
+  set, or in a standard location for the dynamic linker (e.g. ``/lib``) if
+  left to the default empty string (``""``).
+
   This option has no performance impact.
 
 - ``CONFIG_RTE_LIBRTE_MLX4_DEBUG`` (default **n**)
@@ -113,6 +118,18 @@ These options can be modified in the ``.config`` file.
 
   This value is always 1 for RX queues since they use a single MP.
 
+Environment variables
+~
+
+- ``MLX4_GLUE_PATH``
+
+  A list of directories in which to search for the rdma-core "glue" plug-in,
+  separated by colons or semi-colons.
+
+  Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS``
+  enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set,
+  since ``LD_LIBRARY_PATH`` has no effect in this case.
+
 Run-time configuration
 ~~
 
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a9e4bf51a..1635dff2b 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -170,6 +170,11 @@ These options can be modified in the ``.config`` file.
   ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as
   missing with ``ldd(1)``.
 
+  It works by moving these dependencies to a purpose-built rdma-core "glue"
+  plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if
+  set, or in a standard location for the dynamic linker (e.g. ``/lib``) if
+  left to the default empty string (``""``).
+
   This option has no performance impact.
 
 - ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**)
@@ -189,6 +194,15 @@ These options can be modified in the ``.config`` file.
 Environment variables
 ~
 
+- ``MLX5_GLUE_PATH``
+
+  A list of directories in which to search for the rdma-core "glue" plug-in,
+  separated by colons or semi-colons.
+
+  Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS``
+  enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set,
+  since ``LD_LIBRARY_PATH`` has no effect in this case.
+
 - ``MLX5_PMD_ENABLE_PADDING``
 
   Enables HW packet padding in PCI bus transactions.
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 61a852fb9..4266cb1bb 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -741,11 +741,52 @@ static struct rte_pci_driver mlx4_driver = {
 static int
 mlx4_glue_init(void)
 {
+   const char *path[] = {
+   /*
+* A basic security check is necessary before trusting
+* MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
+*/
+   (geteuid() == getuid() && getegid() == getgid() ?
+getenv("MLX4_GLUE_PATH") : NULL),
+   RTE_EAL_PMD_PATH,
+   };
+   unsigned int i = 0;
void *handle = NULL;
void **sym;
const char *dlmsg;
 
-   handle = dlopen(MLX4_GLUE, RTLD_LAZY);
+   while (!handle && i != RTE_DIM(path)) {
+   const char *end;
+   size_t len;
+   int ret;
+
+   if (!path[i]) {
+   ++i;
+   continue;
+   }
+   end = strpbrk(path[i], ":;");
+   if (!end)
+   end = path[i] + strlen(path[i]);
+   len = end - path[i];
+   ret = 0;
+   do {
+   char name[ret + 1];
+
+   ret = snprintf(name, ret, "%.*s%s" MLX4_GLUE "\n",
+  (int)len, path[i],
+  (!len || *(end - 1) == '/') ? "" : "/");
+   if (ret == -1)
+   break;
+   if (sizeof(name) != (size_t)ret + 1

Re: [dpdk-dev] vhost: unlink existing file for server mode

2018-02-02 Thread Ilya Maximets
Oh.

It's such a game: twice a year someone sends this patch to mail list.

I have another one for you:
* Find all the patches equal to this one in archives.
* Read all the discussions.
* Come back if you have some new ideas, not already discussed many times here.

Sorry for my sarcasm.
NACK for this, as usual.

Best regards, Ilya Maximets.

On 02.02.2018 11:39, Zhiyong Yang wrote:
> Vhost-user startup will fail based on server mode, if the specified
> socket file has already existed. The patch introduces function
> unlink() to remove the possible existing file.
> 
> Cc: y...@fridaylinux.org
> Cc: maxime.coque...@redhat.com
> 
> Signed-off-by: Zhiyong Yang 
> ---
>  lib/librte_vhost/socket.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
> index 6e3857e7a..324a24f4e 100644
> --- a/lib/librte_vhost/socket.c
> +++ b/lib/librte_vhost/socket.c
> @@ -315,6 +315,7 @@ vhost_user_start_server(struct vhost_user_socket *vsocket)
>   int fd = vsocket->socket_fd;
>   const char *path = vsocket->path;
>  
> + unlink(path);
>   ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
>   if (ret < 0) {
>   RTE_LOG(ERR, VHOST_CONFIG,
> 


Re: [dpdk-dev] [PATCH v3] eal: add error check for core options

2018-02-02 Thread Bruce Richardson
On Fri, Feb 02, 2018 at 02:51:28PM +, Marko Kovacevic wrote:
> Error information on current core usage list, mask or map
> were incomplete. Added states to differentiate core usage
> and to inform user.
> 
> Signed-off-by: Marko Kovacevic 
> Reviewed-by: Anatoly Burakov 
Acked-by: Bruce Richardson 

This is fine as-is - one comment below for future consideration.
> 
> ---
> 
> V3:
>  - Changed to reflect the coding guidelines - Bruce
>  - update the documentation for better clarity - Bruce
>  - Added back the reviewer information - Anatoly
> 
> V2:
>  - Cleaned up the logging for error cases - Anatoly
> ---
>  doc/guides/testpmd_app_ug/run_app.rst  |  4 
>  lib/librte_eal/common/eal_common_options.c | 36 
> +++---
>  2 files changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/guides/testpmd_app_ug/run_app.rst 
> b/doc/guides/testpmd_app_ug/run_app.rst
> index 46da1df..85e725f 100644
> --- a/doc/guides/testpmd_app_ug/run_app.rst
> +++ b/doc/guides/testpmd_app_ug/run_app.rst
> @@ -62,6 +62,10 @@ See the DPDK Getting Started Guides for more information 
> on these options.
>  The grouping ``()`` can be omitted for single element group.
>  The ``@`` can be omitted if cpus and lcores have the same value.
>  
> +.. Note::
> +At a given instance only one core option ``--lcores``, ``-l`` or ``-c`` 
> can be used.
> +
> +
>  *   ``--master-lcore ID``
>  
>  Core ID that is used as master.
> diff --git a/lib/librte_eal/common/eal_common_options.c 
> b/lib/librte_eal/common/eal_common_options.c
> index b6d2762..66f0868 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -57,6 +57,9 @@
>  #include "eal_filesystem.h"
>  
>  #define BITS_PER_HEX 4
> +#define LCORE_OPT_LST 1
> +#define LCORE_OPT_MSK 2
> +#define LCORE_OPT_MAP 3
>  
>  const char
>  eal_short_options[] =
> @@ -1028,7 +1031,16 @@ eal_parse_common_option(int opt, const char *optarg,
>   RTE_LOG(ERR, EAL, "invalid coremask\n");
>   return -1;
>   }
> - core_parsed = 1;
> +
> + if (core_parsed) {
> + RTE_LOG(ERR, EAL, "Option -c is ignored, because (%s) 
> is set!\n",
> + (core_parsed == LCORE_OPT_LST) ? "-l" :
> + (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
> + "-c");

This block is repeated in slightly different forms 3 times. It should
probably be replaced using a function or macro to return the appropriate
string based on core_parsed value.

Thanks,
/Bruce


Re: [dpdk-dev] vhost: unlink existing file for server mode

2018-02-02 Thread Richardson, Bruce


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Ilya Maximets
> Sent: Friday, February 2, 2018 3:30 PM
> To: Yang, Zhiyong ; dev@dpdk.org
> Cc: y...@fridaylinux.org; maxime.coque...@redhat.com
> Subject: Re: [dpdk-dev] vhost: unlink existing file for server mode
> 
> Oh.
> 
> It's such a game: twice a year someone sends this patch to mail list.
> 
> I have another one for you:
> * Find all the patches equal to this one in archives.
> * Read all the discussions.
> * Come back if you have some new ideas, not already discussed many times
> here.
> 
> Sorry for my sarcasm.
> NACK for this, as usual.
> 
> Best regards, Ilya Maximets.

Surely the fact of repeated patches is an indication that this should be
explicitly called out at appropriately places in the code via comments.
Far easier to provide people the info in the code they are changing than
expecting them to trawl through historical mailing list entries.

Regards,
/Bruce

> 
> On 02.02.2018 11:39, Zhiyong Yang wrote:
> > Vhost-user startup will fail based on server mode, if the specified
> > socket file has already existed. The patch introduces function
> > unlink() to remove the possible existing file.
> >
> > Cc: y...@fridaylinux.org
> > Cc: maxime.coque...@redhat.com
> >
> > Signed-off-by: Zhiyong Yang 
> > ---
> >  lib/librte_vhost/socket.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
> > index 6e3857e7a..324a24f4e 100644
> > --- a/lib/librte_vhost/socket.c
> > +++ b/lib/librte_vhost/socket.c
> > @@ -315,6 +315,7 @@ vhost_user_start_server(struct vhost_user_socket
> *vsocket)
> > int fd = vsocket->socket_fd;
> > const char *path = vsocket->path;
> >
> > +   unlink(path);
> > ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket-
> >un));
> > if (ret < 0) {
> > RTE_LOG(ERR, VHOST_CONFIG,
> >


Re: [dpdk-dev] vhost: unlink existing file for server mode

2018-02-02 Thread Ilya Maximets
On 02.02.2018 18:38, Richardson, Bruce wrote:
> 
> 
>> -Original Message-
>> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Ilya Maximets
>> Sent: Friday, February 2, 2018 3:30 PM
>> To: Yang, Zhiyong ; dev@dpdk.org
>> Cc: y...@fridaylinux.org; maxime.coque...@redhat.com
>> Subject: Re: [dpdk-dev] vhost: unlink existing file for server mode
>>
>> Oh.
>>
>> It's such a game: twice a year someone sends this patch to mail list.
>>
>> I have another one for you:
>> * Find all the patches equal to this one in archives.
>> * Read all the discussions.
>> * Come back if you have some new ideas, not already discussed many times
>> here.
>>
>> Sorry for my sarcasm.
>> NACK for this, as usual.
>>
>> Best regards, Ilya Maximets.
> 
> Surely the fact of repeated patches is an indication that this should be
> explicitly called out at appropriately places in the code via comments.
> Far easier to provide people the info in the code they are changing than
> expecting them to trawl through historical mailing list entries.
> 
> Regards,
> /Bruce
> 

Yes, you're right. We just discussed the situation locally in the office
and came to the exactly same conclusion. We definitely need the comment
here to prevent future unlink related patches.

>>
>> On 02.02.2018 11:39, Zhiyong Yang wrote:
>>> Vhost-user startup will fail based on server mode, if the specified
>>> socket file has already existed. The patch introduces function
>>> unlink() to remove the possible existing file.
>>>
>>> Cc: y...@fridaylinux.org
>>> Cc: maxime.coque...@redhat.com
>>>
>>> Signed-off-by: Zhiyong Yang 
>>> ---
>>>  lib/librte_vhost/socket.c | 1 +
>>>  1 file changed, 1 insertion(+)
>>>
>>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>>> index 6e3857e7a..324a24f4e 100644
>>> --- a/lib/librte_vhost/socket.c
>>> +++ b/lib/librte_vhost/socket.c
>>> @@ -315,6 +315,7 @@ vhost_user_start_server(struct vhost_user_socket
>> *vsocket)
>>> int fd = vsocket->socket_fd;
>>> const char *path = vsocket->path;
>>>
>>> +   unlink(path);
>>> ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket-
>>> un));
>>> if (ret < 0) {
>>> RTE_LOG(ERR, VHOST_CONFIG,
>>>


Re: [dpdk-dev] [PATCH] pmdinfogen: fix resource leak of FILE object

2018-02-02 Thread Bruce Richardson
On Fri, Feb 02, 2018 at 07:44:39AM -0500, Neil Horman wrote:
> On Fri, Feb 02, 2018 at 12:00:58PM +, Bruce Richardson wrote:
> > Coverity flags an issue where the resources used by the FILE object for
> > the temporary input file are leaked. This is a very minor issue, but is
> > easily fixed, while also avoiding later problems where we try to close
> > an invalid file descriptor in the failure case.
> > 
> > The fix is to use "dup()" to get a new file descriptor number rather than
> > using the value directly from fileno. This allows us to close the file
> > opened with tmpfile() within in scope block, while allowing the duplicate
> > to pass to the outer block and be closed when the function terminates.
> > 
> > As a side-effect I/O in the function is therefore changed from using stdio
> > fread/fwrite to read/write system calls.
> > 
> > Coverity issue: 260399
> > Fixes: 0d68533617e3 ("pmdinfogen: allow using stdin and stdout")
> > 
> > Signed-off-by: Bruce Richardson 
> > ---
> >  buildtools/pmdinfogen/pmdinfogen.c | 16 ++--
> >  1 file changed, 10 insertions(+), 6 deletions(-)
> > 
> > diff --git a/buildtools/pmdinfogen/pmdinfogen.c 
> > b/buildtools/pmdinfogen/pmdinfogen.c
> > index 45b267346..0f35ca46b 100644
> > --- a/buildtools/pmdinfogen/pmdinfogen.c
> > +++ b/buildtools/pmdinfogen/pmdinfogen.c
> > @@ -50,20 +50,24 @@ static void *grab_file(const char *filename, unsigned 
> > long *size)
> > /* from stdin, use a temporary file to mmap */
> > FILE *infile;
> > char buffer[1024];
> > -   size_t n;
> > +   int n;
> >  
> > infile = tmpfile();
> > if (infile == NULL) {
> > perror("tmpfile");
> > return NULL;
> > }
> > -   while (!feof(stdin)) {
> > -   n = fread(buffer, 1, sizeof(buffer), stdin);
> > -   if (fwrite(buffer, 1, n, infile) != n)
> > +   fd = dup(fileno(infile));
> > +   fclose(infile);
> > +   if (fd < 0)
> > +   return NULL;
> > +
> > +   n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > +   while (n > 0) {
> > +   if (write(fd, buffer, n) != n)
> > goto failed;
> > +   n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > }
> > -   fflush(infile);
> > -   fd = fileno(infile);
> > }
> >  
> > if (fstat(fd, &st))
> > -- 
> > 2.14.3
> > 
> > 
> 
> Wouldn't it be just as good, and easier to check fd for == -1 as a condition 
> of
> calling close?
> 
> like 
> failed:
>   if (fd >= 0)
>   close(fd);
> 
That would fix the problem of calling goto failed with fd set to -1, but
would not fix the resource issue that coverity was complaining about. We
were allocating a stdio FILE object, then taking just the fileno of it
and letting the file number go out of scope. This cleans this that up,
so that we just use file numbers and properly close the FILE * once it's
outlived its usefulness.

BTW: I did investigate using open and O_TMPFILE in place of tmpfile()
call, but while it would work great on Linux, it's not available
elsewhere, so tmpfile looks the best option.

Regards,
/Bruce


Re: [dpdk-dev] [PATCH] pmdinfogen: fix resource leak of FILE object

2018-02-02 Thread Bruce Richardson
On Fri, Feb 02, 2018 at 03:47:43PM +, Bruce Richardson wrote:
> On Fri, Feb 02, 2018 at 07:44:39AM -0500, Neil Horman wrote:
> > On Fri, Feb 02, 2018 at 12:00:58PM +, Bruce Richardson wrote:
> > > Coverity flags an issue where the resources used by the FILE object for
> > > the temporary input file are leaked. This is a very minor issue, but is
> > > easily fixed, while also avoiding later problems where we try to close
> > > an invalid file descriptor in the failure case.
> > > 
> > > The fix is to use "dup()" to get a new file descriptor number rather than
> > > using the value directly from fileno. This allows us to close the file
> > > opened with tmpfile() within in scope block, while allowing the duplicate
> > > to pass to the outer block and be closed when the function terminates.
> > > 
> > > As a side-effect I/O in the function is therefore changed from using stdio
> > > fread/fwrite to read/write system calls.
> > > 
> > > Coverity issue: 260399
> > > Fixes: 0d68533617e3 ("pmdinfogen: allow using stdin and stdout")
> > > 
> > > Signed-off-by: Bruce Richardson 
> > > ---
> > >  buildtools/pmdinfogen/pmdinfogen.c | 16 ++--
> > >  1 file changed, 10 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/buildtools/pmdinfogen/pmdinfogen.c 
> > > b/buildtools/pmdinfogen/pmdinfogen.c
> > > index 45b267346..0f35ca46b 100644
> > > --- a/buildtools/pmdinfogen/pmdinfogen.c
> > > +++ b/buildtools/pmdinfogen/pmdinfogen.c
> > > @@ -50,20 +50,24 @@ static void *grab_file(const char *filename, unsigned 
> > > long *size)
> > >   /* from stdin, use a temporary file to mmap */
> > >   FILE *infile;
> > >   char buffer[1024];
> > > - size_t n;
> > > + int n;
> > >  
> > >   infile = tmpfile();
> > >   if (infile == NULL) {
> > >   perror("tmpfile");
> > >   return NULL;
> > >   }
> > > - while (!feof(stdin)) {
> > > - n = fread(buffer, 1, sizeof(buffer), stdin);
> > > - if (fwrite(buffer, 1, n, infile) != n)
> > > + fd = dup(fileno(infile));
> > > + fclose(infile);
> > > + if (fd < 0)
> > > + return NULL;
> > > +
> > > + n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > > + while (n > 0) {
> > > + if (write(fd, buffer, n) != n)
> > >   goto failed;
> > > + n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > >   }
> > > - fflush(infile);
> > > - fd = fileno(infile);
> > >   }
> > >  
> > >   if (fstat(fd, &st))
> > > -- 
> > > 2.14.3
> > > 
> > > 
> > 
> > Wouldn't it be just as good, and easier to check fd for == -1 as a 
> > condition of
> > calling close?
> > 
> > like 
> > failed:
> > if (fd >= 0)
> > close(fd);
> > 
> That would fix the problem of calling goto failed with fd set to -1, but
> would not fix the resource issue that coverity was complaining about. We
> were allocating a stdio FILE object, then taking just the fileno of it
> and letting the file number go out of scope. This cleans this that up,
s/file number/FILE object ptr/

> so that we just use file numbers and properly close the FILE * once it's
> outlived its usefulness.
> 
> BTW: I did investigate using open and O_TMPFILE in place of tmpfile()
> call, but while it would work great on Linux, it's not available
> elsewhere, so tmpfile looks the best option.
> 
> Regards,
> /Bruce


Re: [dpdk-dev] [PATCH 2/2] net/sfc: add support for meson build

2018-02-02 Thread Bruce Richardson
On Fri, Feb 02, 2018 at 12:43:58PM +, Andrew Rybchenko wrote:
> From: Ivan Malov 
> 
> Signed-off-by: Ivan Malov 
> Signed-off-by: Andrew Rybchenko 
> ---
>  drivers/net/meson.build  |  2 +-
>  drivers/net/sfc/base/meson.build | 69 
> 
>  drivers/net/sfc/meson.build  | 61 +++
>  3 files changed, 131 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/sfc/base/meson.build
>  create mode 100644 drivers/net/sfc/meson.build

Hi,

looks pretty good in general. I have a few comments below to improve
things.

Unfortunately, this will break the ARM builds in it's current form too
(or at least it broke the builds using the cross-files that I tested).
I think you need to add a check at the top of the driver meson.build
file for unsupported architectures, and set "build = false" for those
platforms.

/Bruce


> diff --git a/drivers/net/sfc/meson.build b/drivers/net/sfc/meson.build
> new file mode 100644
> index 000..b603579
> --- /dev/null
> +++ b/drivers/net/sfc/meson.build
> @@ -0,0 +1,61 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +#
> +# Copyright (c) 2016-2018 Solarflare Communications Inc.
> +# All rights reserved.
> +#
> +# This software was jointly developed between OKTET Labs (under contract
> +# for Solarflare) and Solarflare Communications, Inc.
> +
> +allow_experimental_apis = true
> +
> +extra_flags = []
> +extra_flags += '-I' + meson.current_source_dir() + '/base'
> +extra_flags += '-I' + meson.current_source_dir()

The driver's own directory is already set in the include path, so it
should not necessary to add it as a cflag. For the base folder, the
"includes" variable should be used rather than the cflags one. These
two lines should just be replaced by:

includes += include_directories('base')

You may also need to put "include_directories: includes," into the
static_library call in the base folder if you have things being included
by base files from the root folder too.

> +extra_flags += '-O3'

The optimisation level is set for the project as a whole, and should not
be overridden in the driver. Otherwise a debug build will not be a debug
build for your driver.



Re: [dpdk-dev] [PATCH 1/2] net/sfc: rename version map file

2018-02-02 Thread Bruce Richardson
On Fri, Feb 02, 2018 at 12:43:57PM +, Andrew Rybchenko wrote:
> From: Ivan Malov 
> 
> The version map filename does not comply with the format
> used by meson build rules for drivers (i.e. on the upper
> level) and needs to be revisited. This patch removes efx
> postfix from the driver title in the filename.
> 
> Signed-off-by: Ivan Malov 
> Signed-off-by: Andrew Rybchenko 

Acked-by: Bruce Richardson 



[dpdk-dev] [PATCH v2 2/4] net/mlx: fix missing includes for rdma-core glue

2018-02-02 Thread Adrien Mazarguil
For consistency since these includes are already pulled by others.

Fixes: 6aca97d310 ("net/mlx4: move rdma-core calls to separate file")
Fixes: 7202118686 ("net/mlx5: move rdma-core calls to separate file")

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_glue.c | 3 +++
 drivers/net/mlx4/mlx4_glue.h | 3 +++
 drivers/net/mlx5/mlx5_glue.h | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c
index 30797bd2b..47ae7ad0f 100644
--- a/drivers/net/mlx4/mlx4_glue.c
+++ b/drivers/net/mlx4/mlx4_glue.c
@@ -3,6 +3,9 @@
  * Copyright 2018 Mellanox
  */
 
+#include 
+#include 
+
 /* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h
index 0623511f2..de251c622 100644
--- a/drivers/net/mlx4/mlx4_glue.h
+++ b/drivers/net/mlx4/mlx4_glue.h
@@ -6,6 +6,9 @@
 #ifndef MLX4_GLUE_H_
 #define MLX4_GLUE_H_
 
+#include 
+#include 
+
 /* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index 6afb629ff..7fed302ba 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -6,6 +6,7 @@
 #ifndef MLX5_GLUE_H_
 #define MLX5_GLUE_H_
 
+#include 
 #include 
 
 /* Verbs headers do not support -pedantic. */
-- 
2.11.0


[dpdk-dev] [PATCH v2 3/4] net/mlx: version rdma-core glue libraries

2018-02-02 Thread Adrien Mazarguil
When built as separate objects, these libraries do not have unique names.
Since they do not maintain a stable ABI, loading an incompatible library
may result in a crash (e.g. in case multiple versions are installed).

This patch addresses the above by versioning glue libraries, both on the
file system (version suffix) and by comparing a dedicated version field
member in glue structures.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/Makefile| 8 ++--
 drivers/net/mlx4/mlx4.c  | 5 +
 drivers/net/mlx4/mlx4_glue.c | 1 +
 drivers/net/mlx4/mlx4_glue.h | 6 ++
 drivers/net/mlx5/Makefile| 8 ++--
 drivers/net/mlx5/mlx5.c  | 5 +
 drivers/net/mlx5/mlx5_glue.c | 1 +
 drivers/net/mlx5/mlx5_glue.h | 6 ++
 8 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index c004ac71c..cc9db9977 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 # Library name.
 LIB = librte_pmd_mlx4.a
-LIB_GLUE = librte_pmd_mlx4_glue.so
+LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
+LIB_GLUE_BASE = librte_pmd_mlx4_glue.so
+LIB_GLUE_VERSION = 18.02.1
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
@@ -64,6 +66,7 @@ CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
 ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y)
 CFLAGS += -DMLX4_GLUE='"$(LIB_GLUE)"'
+CFLAGS += -DMLX4_GLUE_VERSION='"$(LIB_GLUE_VERSION)"'
 CFLAGS_mlx4_glue.o += -fPIC
 LDLIBS += -ldl
 else
@@ -131,6 +134,7 @@ $(LIB): $(LIB_GLUE)
 
 $(LIB_GLUE): mlx4_glue.o
$Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \
+   -Wl,-h,$(LIB_GLUE) \
-s -shared -o $@ $< -libverbs -lmlx4
 
 mlx4_glue.o: mlx4_autoconf.h
@@ -139,6 +143,6 @@ endif
 
 clean_mlx4: FORCE
$Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new
-   $Q rm -f -- mlx4_glue.o $(LIB_GLUE)
+   $Q rm -f -- mlx4_glue.o $(LIB_GLUE_BASE)*
 
 clean: clean_mlx4
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 201d39b6e..61a852fb9 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -808,6 +808,11 @@ rte_mlx4_pmd_init(void)
assert(((const void *const *)mlx4_glue)[i]);
}
 #endif
+   if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) {
+   ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required",
+ mlx4_glue->version, MLX4_GLUE_VERSION);
+   return;
+   }
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
 }
diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c
index 47ae7ad0f..3b79d320e 100644
--- a/drivers/net/mlx4/mlx4_glue.c
+++ b/drivers/net/mlx4/mlx4_glue.c
@@ -240,6 +240,7 @@ mlx4_glue_dv_set_context_attr(struct ibv_context *context,
 }
 
 const struct mlx4_glue *mlx4_glue = &(const struct mlx4_glue){
+   .version = MLX4_GLUE_VERSION,
.fork_init = mlx4_glue_fork_init,
.get_async_event = mlx4_glue_get_async_event,
.ack_async_event = mlx4_glue_ack_async_event,
diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h
index de251c622..368f906bf 100644
--- a/drivers/net/mlx4/mlx4_glue.h
+++ b/drivers/net/mlx4/mlx4_glue.h
@@ -19,7 +19,13 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#ifndef MLX4_GLUE_VERSION
+#define MLX4_GLUE_VERSION ""
+#endif
+
+/* LIB_GLUE_VERSION must be updated every time this structure is modified. */
 struct mlx4_glue {
+   const char *version;
int (*fork_init)(void);
int (*get_async_event)(struct ibv_context *context,
   struct ibv_async_event *event);
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 4b20d718b..4086f2039 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -33,7 +33,9 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 # Library name.
 LIB = librte_pmd_mlx5.a
-LIB_GLUE = librte_pmd_mlx5_glue.so
+LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
+LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
+LIB_GLUE_VERSION = 18.02.1
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
@@ -74,6 +76,7 @@ CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"'
+CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"'
 CFLAGS_mlx5_glue.o += -fPIC
 LDLIBS += -ldl
 else
@@ -180,6 +183,7 @@ $(LIB): $(LIB_GLUE)
 
 $(LIB_GLUE): mlx5_glue.o
$Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \
+   -Wl,-h,$(LIB_GLUE) \
-s -shared -o $@ $< -libverbs -lmlx5
 
 mlx5_glue.o: mlx5_autoconf.h
@@ -188,6 +192,6 @@ endif
 
 clean_mlx5: FORCE
$Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new
-   $Q rm -f -- mlx5_glue.o $(LIB_GLUE)
+   $Q rm -f -- mlx5_glue.o $(LIB_GLUE_BASE)*
 
 clean: clean_mlx5
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 050cfac0d..341230d2b 1

[dpdk-dev] [PATCH v2 1/4] net/mlx: add debug checks to glue structure

2018-02-02 Thread Adrien Mazarguil
This code should catch mistakes early if a glue structure member is added
without a corresponding implementation in the library.

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4.c | 9 +
 drivers/net/mlx5/mlx5.c | 9 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 50a55ee52..201d39b6e 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -799,6 +799,15 @@ rte_mlx4_pmd_init(void)
return;
assert(mlx4_glue);
 #endif
+#ifndef NDEBUG
+   /* Glue structure must not contain any NULL pointers. */
+   {
+   unsigned int i;
+
+   for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i)
+   assert(((const void *const *)mlx4_glue)[i]);
+   }
+#endif
mlx4_glue->fork_init();
rte_pci_register(&mlx4_driver);
 }
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 544599b01..050cfac0d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1142,6 +1142,15 @@ rte_mlx5_pmd_init(void)
return;
assert(mlx5_glue);
 #endif
+#ifndef NDEBUG
+   /* Glue structure must not contain any NULL pointers. */
+   {
+   unsigned int i;
+
+   for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
+   assert(((const void *const *)mlx5_glue)[i]);
+   }
+#endif
mlx5_glue->fork_init();
rte_pci_register(&mlx5_driver);
 }
-- 
2.11.0


[dpdk-dev] [PATCH v2 4/4] net/mlx: make rdma-core glue path configurable

2018-02-02 Thread Adrien Mazarguil
Since rdma-core glue libraries are intrinsically tied to their respective
PMDs and used as internal plug-ins, their presence in the default search
path among other system libraries for the dynamic linker is not necessarily
desired.

This commit enables their installation and subsequent look-up at run time
in RTE_EAL_PMD_PATH if configured to a nonempty string. This path can also
be overridden by environment variables MLX[45]_GLUE_PATH.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/nics/mlx4.rst | 17 +
 doc/guides/nics/mlx5.rst | 14 ++
 drivers/net/mlx4/mlx4.c  | 43 ++-
 drivers/net/mlx5/mlx5.c  | 43 ++-
 4 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
index 88161781c..9e4fbf692 100644
--- a/doc/guides/nics/mlx4.rst
+++ b/doc/guides/nics/mlx4.rst
@@ -97,6 +97,11 @@ These options can be modified in the ``.config`` file.
   ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as
   missing with ``ldd(1)``.
 
+  It works by moving these dependencies to a purpose-built rdma-core "glue"
+  plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if
+  set, or in a standard location for the dynamic linker (e.g. ``/lib``) if
+  left to the default empty string (``""``).
+
   This option has no performance impact.
 
 - ``CONFIG_RTE_LIBRTE_MLX4_DEBUG`` (default **n**)
@@ -113,6 +118,18 @@ These options can be modified in the ``.config`` file.
 
   This value is always 1 for RX queues since they use a single MP.
 
+Environment variables
+~
+
+- ``MLX4_GLUE_PATH``
+
+  A list of directories in which to search for the rdma-core "glue" plug-in,
+  separated by colons or semi-colons.
+
+  Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS``
+  enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set,
+  since ``LD_LIBRARY_PATH`` has no effect in this case.
+
 Run-time configuration
 ~~
 
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a9e4bf51a..1635dff2b 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -170,6 +170,11 @@ These options can be modified in the ``.config`` file.
   ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as
   missing with ``ldd(1)``.
 
+  It works by moving these dependencies to a purpose-built rdma-core "glue"
+  plug-in, which must either be installed in ``CONFIG_RTE_EAL_PMD_PATH`` if
+  set, or in a standard location for the dynamic linker (e.g. ``/lib``) if
+  left to the default empty string (``""``).
+
   This option has no performance impact.
 
 - ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**)
@@ -189,6 +194,15 @@ These options can be modified in the ``.config`` file.
 Environment variables
 ~
 
+- ``MLX5_GLUE_PATH``
+
+  A list of directories in which to search for the rdma-core "glue" plug-in,
+  separated by colons or semi-colons.
+
+  Only matters when compiled with ``CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS``
+  enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set,
+  since ``LD_LIBRARY_PATH`` has no effect in this case.
+
 - ``MLX5_PMD_ENABLE_PADDING``
 
   Enables HW packet padding in PCI bus transactions.
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 61a852fb9..4016ddb7b 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -741,11 +741,52 @@ static struct rte_pci_driver mlx4_driver = {
 static int
 mlx4_glue_init(void)
 {
+   const char *path[] = {
+   /*
+* A basic security check is necessary before trusting
+* MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
+*/
+   (geteuid() == getuid() && getegid() == getgid() ?
+getenv("MLX4_GLUE_PATH") : NULL),
+   RTE_EAL_PMD_PATH,
+   };
+   unsigned int i = 0;
void *handle = NULL;
void **sym;
const char *dlmsg;
 
-   handle = dlopen(MLX4_GLUE, RTLD_LAZY);
+   while (!handle && i != RTE_DIM(path)) {
+   const char *end;
+   size_t len;
+   int ret;
+
+   if (!path[i]) {
+   ++i;
+   continue;
+   }
+   end = strpbrk(path[i], ":;");
+   if (!end)
+   end = path[i] + strlen(path[i]);
+   len = end - path[i];
+   ret = 0;
+   do {
+   char name[ret + 1];
+
+   ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE,
+  (int)len, path[i],
+  (!len || *(end - 1) == '/') ? "" : "/");
+   if (ret == -1)
+   break;
+   if (sizeof(name) != (size_t)ret

[dpdk-dev] [PATCH v2 0/4] net/mlx: enhance rdma-core glue configuration

2018-02-02 Thread Adrien Mazarguil
The decision to deliver mlx4/mlx5 rdma-core glue plug-ins separately instead
of generating them at run time due to security concerns [1] led to a few
issues:

- They must be present on the file system before running DPDK.
- Their location must be known to the dynamic linker.
- Their names overlap and ABI compatibility is not guaranteed, which may
  lead to crashes.

This series addresses the above by adding version information to plug-ins
and taking CONFIG_RTE_EAL_PMD_PATH into account to locate them on the file
system.

[1] http://dpdk.org/ml/archives/dev/2018-January/089617.html

v2 changes:

- Fixed extra "\n" in glue file name generation (although it didn't break
  functionality).

Adrien Mazarguil (4):
  net/mlx: add debug checks to glue structure
  net/mlx: fix missing includes for rdma-core glue
  net/mlx: version rdma-core glue libraries
  net/mlx: make rdma-core glue path configurable

 doc/guides/nics/mlx4.rst | 17 
 doc/guides/nics/mlx5.rst | 14 ++
 drivers/net/mlx4/Makefile|  8 --
 drivers/net/mlx4/mlx4.c  | 57 ++-
 drivers/net/mlx4/mlx4_glue.c |  4 +++
 drivers/net/mlx4/mlx4_glue.h |  9 +++
 drivers/net/mlx5/Makefile|  8 --
 drivers/net/mlx5/mlx5.c  | 57 ++-
 drivers/net/mlx5/mlx5_glue.c |  1 +
 drivers/net/mlx5/mlx5_glue.h |  7 +
 10 files changed, 176 insertions(+), 6 deletions(-)

-- 
2.11.0


Re: [dpdk-dev] [PATCH v2 0/4] net/mlx: enhance rdma-core glue configuration

2018-02-02 Thread Nélio Laranjeiro
On Fri, Feb 02, 2018 at 05:46:10PM +0100, Adrien Mazarguil wrote:
> The decision to deliver mlx4/mlx5 rdma-core glue plug-ins separately instead
> of generating them at run time due to security concerns [1] led to a few
> issues:
> 
> - They must be present on the file system before running DPDK.
> - Their location must be known to the dynamic linker.
> - Their names overlap and ABI compatibility is not guaranteed, which may
>   lead to crashes.
> 
> This series addresses the above by adding version information to plug-ins
> and taking CONFIG_RTE_EAL_PMD_PATH into account to locate them on the file
> system.
> 
> [1] http://dpdk.org/ml/archives/dev/2018-January/089617.html
> 
> v2 changes:
> 
> - Fixed extra "\n" in glue file name generation (although it didn't break
>   functionality).
> 
> Adrien Mazarguil (4):
>   net/mlx: add debug checks to glue structure
>   net/mlx: fix missing includes for rdma-core glue
>   net/mlx: version rdma-core glue libraries
>   net/mlx: make rdma-core glue path configurable
> 
>  doc/guides/nics/mlx4.rst | 17 
>  doc/guides/nics/mlx5.rst | 14 ++
>  drivers/net/mlx4/Makefile|  8 --
>  drivers/net/mlx4/mlx4.c  | 57 ++-
>  drivers/net/mlx4/mlx4_glue.c |  4 +++
>  drivers/net/mlx4/mlx4_glue.h |  9 +++
>  drivers/net/mlx5/Makefile|  8 --
>  drivers/net/mlx5/mlx5.c  | 57 ++-
>  drivers/net/mlx5/mlx5_glue.c |  1 +
>  drivers/net/mlx5/mlx5_glue.h |  7 +
>  10 files changed, 176 insertions(+), 6 deletions(-)
> 
> -- 
> 2.11.0

For the series,

Acked-by: Nelio Laranjeiro 

-- 
Nélio Laranjeiro
6WIND


[dpdk-dev] I40E VF nullptr Dreference: rx_mbuf_alloc_failed

2018-02-02 Thread Stefan Baranoff
All,

I was unclear if this should be usage or dev but it seemed like a dev issue
to me.

I'm on DPDK 16.11.2 (CentOS packages) using the I40EVF driver and in the
case of rx_mbuf_alloc_failed there is a null pointer dereference in
drivers/net/i40e/i40e_rxtx.c line 830. The variable 'dev' is null.

Looking at it right after initializing the port/queues it appears the
source of that variable
((struct i40e_rx_queue*)rte_eth_devices[0].data->rx_queues[0])->vsi->
adapter->eth_dev
is null as well. I couldn't find anywhere in the code base that initialized
that variable.

What am I missing/not doing right? Where is eth_dev supposed to be
initialized and is that just missing or is vsi->adapter->eth_dev the wrong
structure to be using in the rx_mbuf_alloc_failed case?


Thanks,
Stefan


Re: [dpdk-dev] [PATCH v3 1/6] test: fix memory leak in bitmap test

2018-02-02 Thread Thomas Monjalon
02/02/2018 11:31, Burakov, Anatoly:
> On 02-Feb-18 9:08 AM, Thomas Monjalon wrote:
> > 01/02/2018 18:04, Burakov, Anatoly:
> >> On 01-Feb-18 12:10 AM, Thomas Monjalon wrote:
> >>> 17/01/2018 12:15, Anatoly Burakov:
>  Acked-by: Cristian Dumitrescu 
> 
>  Fixes: c7e4a134e769 ("test: verify bitmap operations")
>  Cc: pbhagavat...@caviumnetworks.com
> 
>  Signed-off-by: Anatoly Burakov 
> >>>
> >>> I think you missed to report some previous acks in this series.
> >>>
> >>>
> >>>
> >>
> >> Which ones were there? I can only see two new acks for v3 from Olivier.
> >> Everything else seems in order, unless my email client is lying to me :)
> > 
> > There are some acks from Cristian.
> > 
> They are all already in the patchset.

Oh, I didn't see them because they are at the beginning or middle of
the message. They should be in chronological order after your Signed-off.



[dpdk-dev] [PATCH] compressdev: implement API

2018-02-02 Thread Fiona Trahe
With the vast amounts of data being transported around networks
and stored in storage systems, reducing data size is becoming ever
more important.

There are both software libraries and hardware devices available
that provide compression, but no common API.
This API is proposed in this commit, which supports the following features:

- Deflate Algorithm (https://tools.ietf.org/html/rfc1951)
- LZS algorithm (https://tools.ietf.org/html/rfc2395)
- Static and Dynamic Huffman encoding.
- Compression levels
- Checksum generation
- Asynchronous burst API
- Session-based (a session contains immutable data only and is useable across 
devices)
- stream-based to maintain state and history data for stateful flows.

Signed-off-by: Fiona Trahe 
Signed-off-by: Pablo de Lara 
---

Changes since RFCv3:

- Added missing dependencies for shared lib
- Used SPDX license header
- Used dynamic logging
- Changed window size type to uint32_t
- Removed some unnecessary API
- Replaced phys_addr_t with rte_iova_t
- Resolved checkpatch issues
- Resolved Doxygen issues
- Added default mx nb qps for virtual pmds
- Removed unnecessary extern keywords
- Resolved enqueue/dequeued prototype issue
- Resolved capabilities issues
- Completed API documentation
- Added missing API in version.map file
- Added experimental tag
- Added compressdev Doxygen doc in index
- Deleted TODOs
- Deleted unused event callback mechanism
- Clarified flush flag and window size behaviour
- Added capability RTE_COMP_FF_NONCOMPRESSED_BLOCKS
- Simplified xform struct - no need for separate
  common and stateful structs
- Renamed device features to COMPDEV to distinguish from
  service features

Items to be addressed in v2
 - Add hash feature
 - Implement stream functions
 - Add int rte_comp_stream_create_in_op_priv()
 - Add meson build
 - Set/clear cache-aligned keyword
 - Description of stateless/stateful behaviour in
   rte_comp_enqueue_burst function header
 - Add capability helper functions 

 config/common_base |   6 +
 doc/api/doxy-api-index.md  |   1 +
 doc/api/doxy-api.conf  |   1 +
 lib/Makefile   |   3 +
 lib/librte_compressdev/Makefile|  29 +
 lib/librte_compressdev/rte_comp.h  | 503 
 lib/librte_compressdev/rte_compressdev.c   | 902 +
 lib/librte_compressdev/rte_compressdev.h   | 757 +
 lib/librte_compressdev/rte_compressdev_pmd.c   | 163 
 lib/librte_compressdev/rte_compressdev_pmd.h   | 439 ++
 lib/librte_compressdev/rte_compressdev_version.map |  47 ++
 lib/librte_eal/common/include/rte_log.h|   1 +
 mk/rte.app.mk  |   1 +
 13 files changed, 2853 insertions(+)
 create mode 100644 lib/librte_compressdev/Makefile
 create mode 100644 lib/librte_compressdev/rte_comp.h
 create mode 100644 lib/librte_compressdev/rte_compressdev.c
 create mode 100644 lib/librte_compressdev/rte_compressdev.h
 create mode 100644 lib/librte_compressdev/rte_compressdev_pmd.c
 create mode 100644 lib/librte_compressdev/rte_compressdev_pmd.h
 create mode 100644 lib/librte_compressdev/rte_compressdev_version.map

diff --git a/config/common_base b/config/common_base
index ad03cf4..e0e5768 100644
--- a/config/common_base
+++ b/config/common_base
@@ -535,6 +535,12 @@ CONFIG_RTE_LIBRTE_PMD_MRVL_CRYPTO=n
 CONFIG_RTE_LIBRTE_PMD_MRVL_CRYPTO_DEBUG=n
 
 #
+# Compile generic compression device library
+#
+CONFIG_RTE_LIBRTE_COMPRESSDEV=y
+CONFIG_RTE_COMPRESS_MAX_DEVS=64
+
+#
 # Compile generic security library
 #
 CONFIG_RTE_LIBRTE_SECURITY=y
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index d77f205..07b8e75 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -43,6 +43,7 @@ The public API headers are grouped by topics:
   [rte_tm] (@ref rte_tm.h),
   [rte_mtr](@ref rte_mtr.h),
   [bbdev]  (@ref rte_bbdev.h),
+  [compressdev](@ref rte_compressdev.h),
   [cryptodev]  (@ref rte_cryptodev.h),
   [security]   (@ref rte_security.h),
   [eventdev]   (@ref rte_eventdev.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index cda52fd..06432c3 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -45,6 +45,7 @@ INPUT   = doc/api/doxy-api-index.md \
   lib/librte_cfgfile \
   lib/librte_cmdline \
   lib/librte_compat \
+  lib/librte_compressdev \
   lib/librte_cryptodev \
   lib/librte_distributor \
   lib/librte_efd \
diff --git a/lib/Makefile b/lib/Makefile
index ec965a6..19396da 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,6 +26,9 @@ DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_m

Re: [dpdk-dev] [PATCH] pmdinfogen: fix resource leak of FILE object

2018-02-02 Thread Neil Horman
On Fri, Feb 02, 2018 at 03:51:12PM +, Bruce Richardson wrote:
> On Fri, Feb 02, 2018 at 03:47:43PM +, Bruce Richardson wrote:
> > On Fri, Feb 02, 2018 at 07:44:39AM -0500, Neil Horman wrote:
> > > On Fri, Feb 02, 2018 at 12:00:58PM +, Bruce Richardson wrote:
> > > > Coverity flags an issue where the resources used by the FILE object for
> > > > the temporary input file are leaked. This is a very minor issue, but is
> > > > easily fixed, while also avoiding later problems where we try to close
> > > > an invalid file descriptor in the failure case.
> > > > 
> > > > The fix is to use "dup()" to get a new file descriptor number rather 
> > > > than
> > > > using the value directly from fileno. This allows us to close the file
> > > > opened with tmpfile() within in scope block, while allowing the 
> > > > duplicate
> > > > to pass to the outer block and be closed when the function terminates.
> > > > 
> > > > As a side-effect I/O in the function is therefore changed from using 
> > > > stdio
> > > > fread/fwrite to read/write system calls.
> > > > 
> > > > Coverity issue: 260399
> > > > Fixes: 0d68533617e3 ("pmdinfogen: allow using stdin and stdout")
> > > > 
> > > > Signed-off-by: Bruce Richardson 
> > > > ---
> > > >  buildtools/pmdinfogen/pmdinfogen.c | 16 ++--
> > > >  1 file changed, 10 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/buildtools/pmdinfogen/pmdinfogen.c 
> > > > b/buildtools/pmdinfogen/pmdinfogen.c
> > > > index 45b267346..0f35ca46b 100644
> > > > --- a/buildtools/pmdinfogen/pmdinfogen.c
> > > > +++ b/buildtools/pmdinfogen/pmdinfogen.c
> > > > @@ -50,20 +50,24 @@ static void *grab_file(const char *filename, 
> > > > unsigned long *size)
> > > > /* from stdin, use a temporary file to mmap */
> > > > FILE *infile;
> > > > char buffer[1024];
> > > > -   size_t n;
> > > > +   int n;
> > > >  
> > > > infile = tmpfile();
> > > > if (infile == NULL) {
> > > > perror("tmpfile");
> > > > return NULL;
> > > > }
> > > > -   while (!feof(stdin)) {
> > > > -   n = fread(buffer, 1, sizeof(buffer), stdin);
> > > > -   if (fwrite(buffer, 1, n, infile) != n)
> > > > +   fd = dup(fileno(infile));
> > > > +   fclose(infile);
> > > > +   if (fd < 0)
> > > > +   return NULL;
> > > > +
> > > > +   n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > > > +   while (n > 0) {
> > > > +   if (write(fd, buffer, n) != n)
> > > > goto failed;
> > > > +   n = read(STDIN_FILENO, buffer, sizeof(buffer));
> > > > }
> > > > -   fflush(infile);
> > > > -   fd = fileno(infile);
> > > > }
> > > >  
> > > > if (fstat(fd, &st))
> > > > -- 
> > > > 2.14.3
> > > > 
> > > > 
> > > 
> > > Wouldn't it be just as good, and easier to check fd for == -1 as a 
> > > condition of
> > > calling close?
> > > 
> > > like 
> > > failed:
> > >   if (fd >= 0)
> > >   close(fd);
> > > 
> > That would fix the problem of calling goto failed with fd set to -1, but
> > would not fix the resource issue that coverity was complaining about. We
> > were allocating a stdio FILE object, then taking just the fileno of it
> > and letting the file number go out of scope. This cleans this that up,
> s/file number/FILE object ptr/
> 
Yeah, Ok, I can see that, though I still think its a bit of a false positive,
since the definition of tmpfile says it will automatically unlink the file on
process exit.  No matter though, what you have is an improvement regardless.

> > so that we just use file numbers and properly close the FILE * once it's
> > outlived its usefulness.
> > 
> > BTW: I did investigate using open and O_TMPFILE in place of tmpfile()
> > call, but while it would work great on Linux, it's not available
> > elsewhere, so tmpfile looks the best option.
> > 
yeah, thats both OS and filesystem specific, I wouldn't trust it too much.

Acked-by: Neil Horman 

> > Regards,
> > /Bruce
> 


Re: [dpdk-dev] [RFC v2 00/23] Dynamic memory allocation for DPDK

2018-02-02 Thread Yongseok Koh
On Tue, Dec 26, 2017 at 05:19:25PM +, Walker, Benjamin wrote:
> On Fri, 2017-12-22 at 09:13 +, Burakov, Anatoly wrote:
> > On 21-Dec-17 9:38 PM, Walker, Benjamin wrote:
> > > SPDK will need some way to register for a notification when pages are
> > > allocated
> > > or freed. For storage, the number of requests per second is (relative to
> > > networking) fairly small (hundreds of thousands per second in a 
> > > traditional
> > > block storage stack, or a few million per second with SPDK). Given that, 
> > > we
> > > can
> > > afford to do a dynamic lookup from va to pa/iova on each request in order 
> > > to
> > > greatly simplify our APIs (users can just pass pointers around instead of
> > > mbufs). DPDK has a way to lookup the pa from a given va, but it does so by
> > > scanning /proc/self/pagemap and is very slow. SPDK instead handles this by
> > > implementing a lookup table of va to pa/iova which we populate by scanning
> > > through the DPDK memory segments at start up, so the lookup in our table 
> > > is
> > > sufficiently fast for storage use cases. If the list of memory segments
> > > changes,
> > > we need to know about it in order to update our map.
> > 
> > Hi Benjamin,
> > 
> > So, in other words, we need callbacks on alloa/free. What information 
> > would SPDK need when receiving this notification? Since we can't really 
> > know in advance how many pages we allocate (it may be one, it may be a 
> > thousand) and they no longer are guaranteed to be contiguous, would a 
> > per-page callback be OK? Alternatively, we could have one callback per 
> > operation, but only provide VA and size of allocated memory, while 
> > leaving everything else to the user. I do add a virt2memseg() function 
> > which would allow you to look up segment physical addresses easier, so
> > you won't have to manually scan memseg lists to get IOVA for a given VA.
> > 
> > Thanks for your feedback and suggestions!
> 
> Yes - callbacks on alloc/free would be perfect. Ideally for us we want one
> callback per virtual memory region allocated, plus a function we can call to
> find the physical addresses/page break points on that virtual region. The
> function that finds the physical addresses does not have to be efficient - 
> we'll
> just call that once when the new region is allocated and store the results in 
> a
> fast lookup table. One call per virtual region is better for us than one call
> per physical page because we're actually keeping multiple different types of
> memory address translation tables in SPDK. One translates from va to pa/iova, 
> so
> for this one we need to break this up into physical pages and it doesn't 
> matter
> if you do one call per virtual region or one per physical page. However 
> another
> one translates from va to RDMA lkey, so it is much more efficient if we can
> register large virtual regions in a single call.

Another yes to callbacks. Like Benjamin mentioned about RDMA, MLX PMD has to
look up LKEY per each packet DMA. Let me briefly explain about this for your
understanding. For security reason, we don't allow application initiates a DMA
transaction with unknown random physical addresses. Instead, va-to-pa mapping
(we call it Memory Region) should be pre-registered and LKEY is the index of the
translation entry registered in device. With the current static memory model, it
is easy to manage because v-p mapping is unchanged over time. But if it becomes
dynamic, MLX PMD should get notified with the event to register/un-regsiter
Memory Region.

For MLX PMD, it is also enough to get one notification per allocation/free of a
virutal memory region. It shouldn't necessarily be a per-page call like Benjamin
mentioned because PA of region doesn't need to be contiguous for registration.
But it doesn't need to know about physical address of the region (I'm not saying
it is unnecessary, but just FYI :-).

Thanks,
Yongseok


Re: [dpdk-dev] [PATCH v3] net/mlx4: fix dev rmv not detected after port stop

2018-02-02 Thread Adrien Mazarguil
Hi Matan,

On Wed, Jan 31, 2018 at 05:07:56PM +, Matan Azrad wrote:
> Hi Adrien
> 
> From: Adrien Mazarguil , Sent: Wednesday, January 31, 2018 4:32 PM
> > Hi Matan,
> > 
> > On Wed, Jan 31, 2018 at 01:44:41PM +, Matan Azrad wrote:
> > > Hi Adrien

> > > I don't know what any application does but for me it is a mistake to
> > > stop all event processes in dev_stop(), Maybe for other application
> > maintainers too.
> > 
> > Just like you, I don't know either what all the applications ever written 
> > for
> > DPDK expect out of dev_stop(). What's for sure is that currently, LSC/RMV
> > don't occcur afterward, the same way these events do not occur before
> > dev_start().
> 
> Why not? RMV event can occur any time after probe.

LSC as well (keep in mind this patch modifies the behavior for both
events). RMV events may also occur before application has a chance to
register a handler for it, in which case this approach fails to solve the
problem it's supposed to solve. Mitigate all you want, the application still
can't rely on that event only.

> > Any application possibly relying on this fact will break. In such a
> > situation, a conservative approach is better.
> 
> If an application should fail to get event in stopped state it may fail in 
> the previous code too:
> The interrupt run from host thread so the next race may occur:
> dev_start() : master thread.
> Context switch.
> RMV interrupt started to run callbacks: host thread.
> Context switch.
> dev_stop(): master thread.
> Start reconfiguration: master thread. 
> Context switch.
> Callback running.
> 
> So, the only thing which can disable callback running after dev_stop() is 
> callback unregistration before it.

After dev_stop() returns, new events cannot be triggered by the PMD which is
what matters. Obviously a callback that already started to run before that
will eventually have to complete. What's your point?

There's a race only if an application performs multiple simultaneous control
operations on the underlying device, but this has always been unsafe (not
only during RMV) because there are no locks, it's documented as such.

> > > > Setting up RMV/LSC callbacks is not the only configuration an
> > > > application usually performs before calling dev_start(). Think about
> > > > setting up flow rules, MAC addresses, VLANs, and so on, this on
> > > > multiple ports before starting them up all at once. Previously it
> > > > could be done in an unspecified order, now they have to take special 
> > > > care
> > for RMV/LSC.
> > >
> > > Or maybe there callbacks code are already safe for it.
> > > Or they manages the unregister\register calls in the right places.
> > 
> > That's my point, these "maybes" don't argue in favor of changing things.
> 
> What I'm saying is that callbacks should be safe or not registered in the 
> right time.

I understand that, though it's not a valid counter argument :)

> > > > Many devops are only safe when called while a device is stopped.
> > > > It's even documented in rte_ethdev.h.
> > > >
> > >
> > > And?
> > 
> > ...And applications therefore often do all their configuration in an 
> > unspecified
> > order while a port is stopped as a measure of safety. No extra care is taken
> > for RMV/LSC. This uncertainty can be addressed by not modifying the current
> > behavior.
> 
> Or they expect to get interrupt and the corner case will come later if we 
> will not change it.

Look, we're throwing opposite use cases at each other in order to make a
point, and I don't see an end to this since we're both stubborn. Let's thus
assume applications use a bit of both.

Now we're left with a problem, before this patch neither use cases were
broken. Now it's applied, mine is broken so let's agree something needs to
be done. Either all affected applications need to be updated, or we can
simply revert this and properly fix fail-safe instead.


> > > So, at least for RMV event, we need the notification also in stopped 
> > > state.
> > 
> > You sent the rte_eth_dev_is_removed() series. You're aware that PMDs
> > implementing this call benefit from an automatic is_removed() check on all
> > remaining devops whenever some error occur.
> > In short, an application will get notified simply by getting dev_start() 
> > (or any
> > other callback) return -EIO and not being able to use the device.
>  
> Yes, but between dev_stop to dev_start may not be any ethdev API calling.

So what, if an application is not using the device, why does it need to know
it's been removed? If it's that important, why can't it run its own periodic
rte_eth_dev_is_removed() probe?

> > PMDs that do not implement is_removed() (or in addition to it) could also
> > artificially trigger a RMV event after dev_start() is called. As long as 
> > the PMD
> > remains quiet while the device is stopped, it's fine.
> 
> How can the PMD do it after dev_start()? Initiate alarm in dev start function 
> to do it later And entering into race again?

What race? All the P

Re: [dpdk-dev] IXGBE, IOMMU DMAR DRHD handling fault issue

2018-02-02 Thread Ravi Kerur
On Fri, Feb 2, 2018 at 2:28 AM, Burakov, Anatoly 
wrote:

> On 01-Feb-18 7:26 PM, Ravi Kerur wrote:
>
>>
>>
>> On Thu, Feb 1, 2018 at 2:10 AM, Burakov, Anatoly <
>> anatoly.bura...@intel.com > wrote:
>>
>> On 31-Jan-18 9:51 PM, Ravi Kerur wrote:
>>
>>
>> Hi Anatoly,
>>
>> Thanks. I am following wiki link below which uses vIOMMU with
>> DPDK as a use-case and instantiate VM as specified with Q35
>> chipset in Qemu.
>>
>> https://wiki.qemu.org/Features/VT-d
>> 
>>
>> Qemu-version is 2.11
>> Host kernel 4.9
>> Guest kernel 4.4
>>
>> I can only guess that guest kernel needs an upgrade in my setup
>> to work correctly, if versions on my setup rings a bell on not
>> having support kindly let me know.
>>
>> When 'modprobe vfio enable_unsafe_noiommu_node=Y' is executed on
>> guest I get following error
>> ...
>> vfio: unknown parameter 'enable_unsafe_noiommu_node' ignored
>> ...
>>
>> in guest.
>>
>> Thanks.
>>
>>
>> AFAIK kernel 4.4 should have noiommu mode - it was introduced in
>> 3.1x days. However, in order for that to work, kernel also has to be
>> built with this mode enabled. My guess is, whoever is the supplier
>> of your kernel, did not do that. You should double-check the kernel
>> configuration of your distribution.
>>
>> However, if you have vIOMMU in QEMU, you shouldn't need noiommu mode
>> - "regular" vfio should work fine. noiommu mode should only be
>> needed if you know you don't have IOMMU enabled in your kernel, and
>> even if you can't enable it, you can still use igb_uio.
>>
>> Hi Anatoly,
>>
>> Do you suggest I take this discussion to kvm/qemu mailing list as I am
>> not sure which component has the issue? I check dmesg for BIOS physical
>> memory map and address reported as fault by DMAR is reported by BIOS as
>> usable on both host and vm.
>>
>> [ 4539.597737] DMAR: [DMA Read] Request device [04:10.0] fault addr
>> *33a128000 *[fault reason 06] PTE Read access is not set
>>
>> dmesg | grep BIOS
>> [0.00] e820: BIOS-provided physical RAM map:
>> [0.00] BIOS-e820: [mem 0x-0x0009afff]
>> usable
>> [0.00] BIOS-e820: [mem 0x0009b000-0x0009]
>> reserved
>> [0.00] BIOS-e820: [mem 0x000e-0x000f]
>> reserved
>> [0.00] BIOS-e820: [mem 0x0010-0x7938afff]
>> usable
>> [0.00] BIOS-e820: [mem 0x7938b000-0x7994bfff]
>> reserved
>> [0.00] BIOS-e820: [mem 0x7994c000-0x7999cfff]
>> ACPI data
>> [0.00] BIOS-e820: [mem 0x7999d000-0x79f7dfff]
>> ACPI NVS
>> [0.00] BIOS-e820: [mem 0x79f7e000-0x7bd37fff]
>> reserved
>> [0.00] BIOS-e820: [mem 0x7bd38000-0x7bd38fff]
>> usable
>> [0.00] BIOS-e820: [mem 0x7bd39000-0x7bdbefff]
>> reserved
>> [0.00] BIOS-e820: [mem 0x7bdbf000-0x7bff]
>> usable
>> [0.00] BIOS-e820: [mem 0x7c00-0x8fff]
>> reserved
>> [0.00] BIOS-e820: [mem 0xfed1c000-0xfed44fff]
>> reserved
>> [0.00] BIOS-e820: [mem 0xff00-0x]
>> reserved
>> [*0.00] BIOS-e820: [mem 0x0001-0x00407fff]
>> usable*
>> *
>> *
>> Kindly let me know your inputs.
>>
>> Thanks.
>>
>>
>> -- Thanks,
>> Anatoly
>>
>>
>>
> The "PTE Read not set" error usually indicates that you are trying to use
> a non-IOMMU method when you have IOMMU enabled (i.e. trying to use igb_uio
> when IOMMU is on). That, to me, indicates that you do have IOMMU emulation
> enabled.
>
> I would go about it this way.
>
> First, i'd ensure that your VM has IOMMU emulation enabled and working.
> You have mentioned that your QEMU version should have IOMMU emulation, so
> let's assume that's the case.
>
> I am not sure of the exact command-line needed to activate the vIOMMU
> emulation, but assuming your VM emulates an Intel processor, your kernel
> command-line should have "iommu=on intel_iommu=on" in it. Check
> /etc/default/grub for GRUB_CMDLINE_LINUX_DEFAULT value, and if the above
> values are not in there, add the above changes, do "update-grub" and reboot
> your VM.
>
> If it already did have the necessary kernel configuration, do "dmesg |
> grep IOMMU" and look for "IOMMU Enabled". That should tell you that IOMMU
> is enabled and working in the kernel.
>
> After that, you can modprobe vfio and vfio-pci, bind NICs to it, and it
> should be working. Please bear in mind that all of that is how i would've
> gone about it if i had similar problems on baremetal, but i'm hoping all of
> it is applicable to VM's. So, either disable IOMMU and use igb_uio, or
> enable IOMMU and use V

Re: [dpdk-dev] IXGBE, IOMMU DMAR DRHD handling fault issue

2018-02-02 Thread Ravi Kerur
On Fri, Feb 2, 2018 at 12:21 PM, Ravi Kerur  wrote:

>
>
> On Fri, Feb 2, 2018 at 2:28 AM, Burakov, Anatoly <
> anatoly.bura...@intel.com> wrote:
>
>> On 01-Feb-18 7:26 PM, Ravi Kerur wrote:
>>
>>>
>>>
>>> On Thu, Feb 1, 2018 at 2:10 AM, Burakov, Anatoly <
>>> anatoly.bura...@intel.com > wrote:
>>>
>>> On 31-Jan-18 9:51 PM, Ravi Kerur wrote:
>>>
>>>
>>> Hi Anatoly,
>>>
>>> Thanks. I am following wiki link below which uses vIOMMU with
>>> DPDK as a use-case and instantiate VM as specified with Q35
>>> chipset in Qemu.
>>>
>>> https://wiki.qemu.org/Features/VT-d
>>> 
>>>
>>> Qemu-version is 2.11
>>> Host kernel 4.9
>>> Guest kernel 4.4
>>>
>>> I can only guess that guest kernel needs an upgrade in my setup
>>> to work correctly, if versions on my setup rings a bell on not
>>> having support kindly let me know.
>>>
>>> When 'modprobe vfio enable_unsafe_noiommu_node=Y' is executed on
>>> guest I get following error
>>> ...
>>> vfio: unknown parameter 'enable_unsafe_noiommu_node' ignored
>>> ...
>>>
>>> in guest.
>>>
>>> Thanks.
>>>
>>>
>>> AFAIK kernel 4.4 should have noiommu mode - it was introduced in
>>> 3.1x days. However, in order for that to work, kernel also has to be
>>> built with this mode enabled. My guess is, whoever is the supplier
>>> of your kernel, did not do that. You should double-check the kernel
>>> configuration of your distribution.
>>>
>>> However, if you have vIOMMU in QEMU, you shouldn't need noiommu mode
>>> - "regular" vfio should work fine. noiommu mode should only be
>>> needed if you know you don't have IOMMU enabled in your kernel, and
>>> even if you can't enable it, you can still use igb_uio.
>>>
>>> Hi Anatoly,
>>>
>>> Do you suggest I take this discussion to kvm/qemu mailing list as I am
>>> not sure which component has the issue? I check dmesg for BIOS physical
>>> memory map and address reported as fault by DMAR is reported by BIOS as
>>> usable on both host and vm.
>>>
>>> [ 4539.597737] DMAR: [DMA Read] Request device [04:10.0] fault addr
>>> *33a128000 *[fault reason 06] PTE Read access is not set
>>>
>>> dmesg | grep BIOS
>>> [0.00] e820: BIOS-provided physical RAM map:
>>> [0.00] BIOS-e820: [mem 0x-0x0009afff]
>>> usable
>>> [0.00] BIOS-e820: [mem 0x0009b000-0x0009]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0x000e-0x000f]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0x0010-0x7938afff]
>>> usable
>>> [0.00] BIOS-e820: [mem 0x7938b000-0x7994bfff]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0x7994c000-0x7999cfff]
>>> ACPI data
>>> [0.00] BIOS-e820: [mem 0x7999d000-0x79f7dfff]
>>> ACPI NVS
>>> [0.00] BIOS-e820: [mem 0x79f7e000-0x7bd37fff]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0x7bd38000-0x7bd38fff]
>>> usable
>>> [0.00] BIOS-e820: [mem 0x7bd39000-0x7bdbefff]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0x7bdbf000-0x7bff]
>>> usable
>>> [0.00] BIOS-e820: [mem 0x7c00-0x8fff]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0xfed1c000-0xfed44fff]
>>> reserved
>>> [0.00] BIOS-e820: [mem 0xff00-0x]
>>> reserved
>>> [*0.00] BIOS-e820: [mem 0x0001-0x00407fff]
>>> usable*
>>> *
>>> *
>>> Kindly let me know your inputs.
>>>
>>> Thanks.
>>>
>>>
>>> -- Thanks,
>>> Anatoly
>>>
>>>
>>>
>> The "PTE Read not set" error usually indicates that you are trying to use
>> a non-IOMMU method when you have IOMMU enabled (i.e. trying to use igb_uio
>> when IOMMU is on). That, to me, indicates that you do have IOMMU emulation
>> enabled.
>>
>> I would go about it this way.
>>
>> First, i'd ensure that your VM has IOMMU emulation enabled and working.
>> You have mentioned that your QEMU version should have IOMMU emulation, so
>> let's assume that's the case.
>>
>> I am not sure of the exact command-line needed to activate the vIOMMU
>> emulation, but assuming your VM emulates an Intel processor, your kernel
>> command-line should have "iommu=on intel_iommu=on" in it. Check
>> /etc/default/grub for GRUB_CMDLINE_LINUX_DEFAULT value, and if the above
>> values are not in there, add the above changes, do "update-grub" and reboot
>> your VM.
>>
>> If it already did have the necessary kernel configuration, do "dmesg |
>> grep IOMMU" and look for "IOMMU Enabled". That should tell you that IOMMU
>> is enabled and working in the kernel.
>>
>> After that, you can modprobe vfio and vfio-pci, bind NICs to it, and it
>> should be working. Please bear i

[dpdk-dev] [PATCH] net/null:Different mac address support

2018-02-02 Thread Mallesh Koujalagi
After attaching two Null device to ovs, seeing "00.00.00.00.00.00" mac
address for both null devices. Fix this issue, by setting different mac
address.

Signed-off-by: Mallesh Koujalagi 
---
 drivers/net/null/rte_eth_null.c | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 9385ffd..98ac115 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -85,8 +85,17 @@ struct pmd_internals {
uint8_t rss_key[40];/**< 40-byte hash key. */
 };
 
+static struct ether_addr base_eth_addr = {
+   .addr_bytes = {
+   0x4E /* N */,
+   0x55 /* U */,
+   0x4C /* L */,
+   0x4C /* L */,
+   0x00,
+   0x00
+   }
+};
 
-static struct ether_addr eth_addr = { .addr_bytes = {0} };
 static struct rte_eth_link pmd_link = {
.link_speed = ETH_SPEED_NUM_10G,
.link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -492,6 +501,7 @@ eth_dev_null_create(struct rte_vdev_device *dev,
struct rte_eth_dev_data *data = NULL;
struct pmd_internals *internals = NULL;
struct rte_eth_dev *eth_dev = NULL;
+   struct ether_addr *eth_addr = NULL;
 
static const uint8_t default_rss_key[40] = {
0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 
0x25, 0x3D,
@@ -519,6 +529,15 @@ eth_dev_null_create(struct rte_vdev_device *dev,
rte_free(data);
return -ENOMEM;
}
+   eth_addr = rte_zmalloc_socket(rte_vdev_device_name(dev),
+   sizeof(*eth_addr), 0, dev->device.numa_node);
+   if (eth_addr == NULL) {
+   rte_eth_dev_release_port(eth_dev);
+   rte_free(data);
+   return -ENOMEM;
+   }
+   *eth_addr = base_eth_addr;
+   eth_addr->addr_bytes[5] = eth_dev->data->port_id;
 
/* now put it all together
 * - store queue data in internals,
@@ -543,7 +562,7 @@ eth_dev_null_create(struct rte_vdev_device *dev,
data->nb_rx_queues = (uint16_t)nb_rx_queues;
data->nb_tx_queues = (uint16_t)nb_tx_queues;
data->dev_link = pmd_link;
-   data->mac_addrs = ð_addr;
+   data->mac_addrs = eth_addr;
 
eth_dev->data = data;
eth_dev->dev_ops = &ops;
-- 
2.7.4



[dpdk-dev] [PATCH] net/null: Support bulk alloc and free.

2018-02-02 Thread Mallesh Koujalagi
After bulk allocation and freeing of multiple mbufs increase more than ~2%
throughput on single core.

Signed-off-by: Mallesh Koujalagi 
---
 drivers/net/null/rte_eth_null.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 9385ffd..247ede0 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -130,10 +130,11 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, 
uint16_t nb_bufs)
return 0;
 
packet_size = h->internals->packet_size;
+
+   if (rte_pktmbuf_alloc_bulk(h->mb_pool, bufs, nb_bufs) != 0)
+   return 0;
+
for (i = 0; i < nb_bufs; i++) {
-   bufs[i] = rte_pktmbuf_alloc(h->mb_pool);
-   if (!bufs[i])
-   break;
rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *), h->dummy_packet,
packet_size);
bufs[i]->data_len = (uint16_t)packet_size;
@@ -149,18 +150,15 @@ eth_null_copy_rx(void *q, struct rte_mbuf **bufs, 
uint16_t nb_bufs)
 static uint16_t
 eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 {
-   int i;
struct null_queue *h = q;
 
if ((q == NULL) || (bufs == NULL))
return 0;
 
-   for (i = 0; i < nb_bufs; i++)
-   rte_pktmbuf_free(bufs[i]);
+   rte_mempool_put_bulk(bufs[0]->pool, (void **)bufs, nb_bufs);
+   rte_atomic64_add(&h->tx_pkts, nb_bufs);
 
-   rte_atomic64_add(&(h->tx_pkts), i);
-
-   return i;
+   return nb_bufs;
 }
 
 static uint16_t
-- 
2.7.4



[dpdk-dev] [PATCH 1/3] net/qede: fix VF vport creation sequence

2018-02-02 Thread Rasesh Mody
From: Harish Patil 

Few adjustments are required to effectively handle VF vport create/delete
sequence. The problem is exposed by recent ethdev TX offload changes
which requires port to be in down state before applying TX offloads.

 - Move vport creation from dev_init() to dev_configure()
 - Force to stop vport if it was already started due to previous run
   (restart case)
 - Move link state enable/disable to dev_init() and dev_close()
   respectively.
 - For MTU change, recreate vport with new MTU value and restore old
   config. This is necessary since VF MTU value can be changed only upon
   vport creation.

Fixes: ec94dbc57362 ("qede: add base driver")
Cc: sta...@dpdk.org

Signed-off-by: Harish Patil 
---
 drivers/net/qede/qede_ethdev.c |  159 +++-
 drivers/net/qede/qede_ethdev.h |3 +-
 drivers/net/qede/qede_rxtx.c   |4 +
 3 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 20a1c31..46e0e36 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -442,55 +442,59 @@ static void qede_reset_queue_stats(struct qede_dev *qdev, 
bool xstats)
 }
 
 static int
-qede_start_vport(struct qede_dev *qdev, uint16_t mtu)
+qede_stop_vport(struct ecore_dev *edev)
 {
-   struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-   struct ecore_sp_vport_start_params params;
struct ecore_hwfn *p_hwfn;
+   uint8_t vport_id;
int rc;
int i;
 
-   memset(¶ms, 0, sizeof(params));
-   params.vport_id = 0;
-   params.mtu = mtu;
-   /* @DPDK - Disable FW placement */
-   params.zero_placement_offset = 1;
+   vport_id = 0;
for_each_hwfn(edev, i) {
p_hwfn = &edev->hwfns[i];
-   params.concrete_fid = p_hwfn->hw_info.concrete_fid;
-   params.opaque_fid = p_hwfn->hw_info.opaque_fid;
-   rc = ecore_sp_vport_start(p_hwfn, ¶ms);
+   rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid,
+vport_id);
if (rc != ECORE_SUCCESS) {
-   DP_ERR(edev, "Start V-PORT failed %d\n", rc);
+   DP_ERR(edev, "Stop V-PORT failed rc = %d\n", rc);
return rc;
}
}
-   ecore_reset_vport_stats(edev);
-   if (IS_PF(edev))
-   qede_reset_queue_stats(qdev, true);
-   DP_INFO(edev, "VPORT started with MTU = %u\n", mtu);
+
+   DP_INFO(edev, "vport stopped\n");
 
return 0;
 }
 
 static int
-qede_stop_vport(struct ecore_dev *edev)
+qede_start_vport(struct qede_dev *qdev, uint16_t mtu)
 {
+   struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+   struct ecore_sp_vport_start_params params;
struct ecore_hwfn *p_hwfn;
-   uint8_t vport_id;
int rc;
int i;
 
-   vport_id = 0;
+   if (qdev->vport_started)
+   qede_stop_vport(edev);
+
+   memset(¶ms, 0, sizeof(params));
+   params.vport_id = 0;
+   params.mtu = mtu;
+   /* @DPDK - Disable FW placement */
+   params.zero_placement_offset = 1;
for_each_hwfn(edev, i) {
p_hwfn = &edev->hwfns[i];
-   rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid,
-vport_id);
+   params.concrete_fid = p_hwfn->hw_info.concrete_fid;
+   params.opaque_fid = p_hwfn->hw_info.opaque_fid;
+   rc = ecore_sp_vport_start(p_hwfn, ¶ms);
if (rc != ECORE_SUCCESS) {
-   DP_ERR(edev, "Stop V-PORT failed rc = %d\n", rc);
+   DP_ERR(edev, "Start V-PORT failed %d\n", rc);
return rc;
}
}
+   ecore_reset_vport_stats(edev);
+   qdev->vport_started = true;
+   DP_INFO(edev, "VPORT started with MTU = %u\n", mtu);
 
return 0;
 }
@@ -1194,6 +1198,8 @@ static int qede_vlan_offload_set(struct rte_eth_dev 
*eth_dev, int mask)
DP_INFO(edev, "No offloads are supported with VLAN Q-in-Q"
" and classification is based on outer tag only\n");
 
+   qdev->vlan_offload_mask = mask;
+
DP_INFO(edev, "vlan offload mask %d vlan-strip %d vlan-filter %d\n",
mask, rxmode->hw_vlan_strip, rxmode->hw_vlan_filter);
 
@@ -1267,13 +1273,6 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
 
PMD_INIT_FUNC_TRACE(edev);
 
-   /* Update MTU only if it has changed */
-   if (qdev->mtu != qdev->new_mtu) {
-   if (qede_update_mtu(eth_dev, qdev->new_mtu))
-   goto err;
-   qdev->mtu = qdev->new_mtu;
-   }
-
/* Configure TPA parameters */
if (rxmode->enable_lro) {
if (qede_enable_tpa(eth_dev, true))
@@ -1287,6 +1286,9 @@ static int qede_dev_start(struct rte_eth_dev *et

[dpdk-dev] [PATCH 3/3] doc: qede: align dynamic log names with standard

2018-02-02 Thread Rasesh Mody
Signed-off-by: Rasesh Mody 
---
 doc/guides/nics/qede.rst |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/guides/nics/qede.rst b/doc/guides/nics/qede.rst
index 0ca5084..63ce9b4 100644
--- a/doc/guides/nics/qede.rst
+++ b/doc/guides/nics/qede.rst
@@ -193,7 +193,7 @@ This section provides instructions to configure SR-IOV with 
Linux OS.
 
 
 #. Running testpmd
-   (Supply ``--log-level="pmd.qede.driver",7`` to view informational messages):
+   (Supply ``--log-level="pmd.net.qede.driver",7`` to view informational 
messages):
 
Refer to the document
:ref:`compiling and testing a PMD for a NIC ` to run
-- 
1.7.10.3



[dpdk-dev] [PATCH 2/3] net/qede: fix few log messages

2018-02-02 Thread Rasesh Mody
Fixes: 9e334305178f ("net/qede: fix MTU set and max Rx length")
Fixes: 22d07d939c3c ("net/qede/base: update")
Cc: sta...@dpdk.org

Signed-off-by: Rasesh Mody 
---
 drivers/net/qede/base/ecore_dcbx.c |7 +++
 drivers/net/qede/qede_rxtx.c   |2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/qede/base/ecore_dcbx.c 
b/drivers/net/qede/base/ecore_dcbx.c
index 632297a..21ddda9 100644
--- a/drivers/net/qede/base/ecore_dcbx.c
+++ b/drivers/net/qede/base/ecore_dcbx.c
@@ -216,10 +216,9 @@ u8 ecore_dcbx_get_dscp_value(struct ecore_hwfn *p_hwfn, u8 
pri)
*type = DCBX_PROTOCOL_ETH;
} else {
*type = DCBX_MAX_PROTOCOL_TYPE;
-   DP_ERR(p_hwfn,
-  "No action required, App TLV id = 0x%x"
-  " app_prio_bitmap = 0x%x\n",
-  id, app_prio_bitmap);
+   DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
+   "No action required, App TLV entry = 0x%x\n",
+  app_prio_bitmap);
return false;
}
 
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 169ede8..0de7c6b 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -158,7 +158,7 @@ static inline int qede_alloc_rx_buffer(struct qede_rx_queue 
*rxq)
qdev->fp_array[queue_idx].rxq = rxq;
 
DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
- queue_idx, nb_desc, qdev->mtu, socket_id);
+ queue_idx, nb_desc, rxq->rx_buf_size, socket_id);
 
return 0;
 }
-- 
1.7.10.3