[dpdk-dev] [PATCH v2] fix drop action seg fault missing compilation flag

2017-06-25 Thread Shachar Beiser

Shachar Beiser (1):
  net/mlx5: fix drop action seg fault

 drivers/net/mlx5/mlx5_flow.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v2] net/mlx5: fix drop action seg fault

2017-06-25 Thread Shachar Beiser
Missing room in flow allocation to store the drop specification.
Changing flow without storing the change in rte_flow.
Fixes: 88c77dedfbb0 ("net/mlx5: implement drop action in hardware classifier")

Signed-off-by: Shachar Beiser 
---
 drivers/net/mlx5/mlx5_flow.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 12893c6..86be929 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -580,6 +580,10 @@ struct mlx5_flow_action {
}
if (action->mark && !flow->ibv_attr && !action->drop)
flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
+#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
+   if (!flow->ibv_attr && action->drop)
+   flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
+#endif
if (!action->queue && !action->drop) {
rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
   NULL, "no valid action");
@@ -1011,9 +1015,6 @@ struct mlx5_flow_action {
return NULL;
}
rte_flow->drop = 1;
-   rte_flow->ibv_attr = flow->ibv_attr;
-   if (!priv->started)
-   return rte_flow;
 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
*drop = (struct ibv_exp_flow_spec_action_drop){
@@ -1023,6 +1024,9 @@ struct mlx5_flow_action {
++flow->ibv_attr->num_of_specs;
flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
 #endif
+   rte_flow->ibv_attr = flow->ibv_attr;
+   if (!priv->started)
+   return rte_flow;
rte_flow->qp = priv->flow_drop_queue->qp;
rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
 rte_flow->ibv_attr);
-- 
1.8.3.1



[dpdk-dev] [PATCH v2] Adding compilation flag for fix drop action seg fault

2017-06-25 Thread Shachar Beiser
Shachar Beiser (1):
  net/mlx5: fix drop action seg fault

 drivers/net/mlx5/mlx5_flow.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v2] net/mlx5: fix drop action seg fault

2017-06-25 Thread Shachar Beiser
Missing room in flow allocation to store the drop specification.
Changing flow without storing the change in rte_flow.
Fixes: 88c77dedfbb0 ("net/mlx5: implement drop action in hardware classifier")

Signed-off-by: Shachar Beiser 
---
 drivers/net/mlx5/mlx5_flow.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 12893c6..86be929 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -580,6 +580,10 @@ struct mlx5_flow_action {
}
if (action->mark && !flow->ibv_attr && !action->drop)
flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
+#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
+   if (!flow->ibv_attr && action->drop)
+   flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
+#endif
if (!action->queue && !action->drop) {
rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
   NULL, "no valid action");
@@ -1011,9 +1015,6 @@ struct mlx5_flow_action {
return NULL;
}
rte_flow->drop = 1;
-   rte_flow->ibv_attr = flow->ibv_attr;
-   if (!priv->started)
-   return rte_flow;
 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
*drop = (struct ibv_exp_flow_spec_action_drop){
@@ -1023,6 +1024,9 @@ struct mlx5_flow_action {
++flow->ibv_attr->num_of_specs;
flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
 #endif
+   rte_flow->ibv_attr = flow->ibv_attr;
+   if (!priv->started)
+   return rte_flow;
rte_flow->qp = priv->flow_drop_queue->qp;
rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
 rte_flow->ibv_attr);
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH v6 0/3] Support TCP/IPv4 GRO in DPDK

2017-06-25 Thread Tan, Jianfeng



On 6/23/2017 10:43 PM, Jiayu Hu wrote:

Generic Receive Offload (GRO) is a widely used SW-based offloading
technique to reduce per-packet processing overhead. It gains performance
by reassembling small packets into large ones. Therefore, we propose to
support GRO in DPDK.

To enable more flexibility to applications, DPDK GRO is implemented as
a user library. Applications explicitly use the GRO library to merge
small packets into large ones. DPDK GRO provides two reassembly modes:
lightweigth mode and heavyweight mode. If applications want to merge
packets in a simple way, they can select lightweight mode API. If
applications need more fine-grained controls, they can select heavyweigth
mode API.

This patchset is to support TCP/IPv4 GRO in DPDK. The first patch is to
provide a GRO API framework. The second patch is to support TCP/IPv4 GRO.
The last patch is to enable TCP/IPv4 GRO in testpmd.

We perform many iperf tests to see the performance gains from DPDK GRO.

The test environment is:
a. two 25Gbps physical ports (p0 and p1) are linked together. Assign p0
to one networking namespace and assign p1 to DPDK;
b. enable TSO for p0. Run iperf client on p0;
c. launch testpmd with p1 and a vhost-user port, and run it in csum
forwarding mode. Select TCP HW checksum calculation for the
vhost-user port in csum forwarding engine. And for better
performance, we select IPv4 and TCP HW checksum calculation for p1
too;
d. launch a VM with one CPU core and a virtio-net port. The VM OS is
ubuntu 16.04 whose virtio-net driver supports GRO. Enables RX csum
offloading and mrg_rxbuf for the VM. Iperf server runs in the VM;
e. to run iperf tests, we need to avoid the csum forwarding engine
compulsorily changes packet mac addresses. SO in our tests, we
comment these codes out (line701 ~ line704 in csumonly.c).

In each test, we run iperf with the following three configurations:
- single flow and single TCP stream
- multiple flows and single TCP stream
- single flow and parallel TCP streams


To  me, flow == TCP stream; so could you explain what does flow mean?



We run above iperf tests on three scenatios:
s1: disabling kernel GRO and enabling DPDK GRO
s2: disabling kernel GRO and disabling DPDK GRO
s3: enabling kernel GRO and disabling DPDK GRO
Comparing the throughput of s1 with s2, we can see the performance gains
from DPDK GRO. Comparing the throughput of s1 and s3, we can compare DPDK
GRO performance with kernel GRO performance.

Test results:
- DPDK GRO throughput is almost 2 times than the throughput of no
DPDK GRO and no kernel GRO;
- DPDK GRO throughput is almost 1.2 times than the throughput of
kernel GRO.

Change log
==
v6:
- avoid checksum validation and calculation
- enable to process IP fragmented packets
- add a command in testpmd
- update documents
- modify rte_gro_timeout_flush and rte_gro_reassemble_burst
- rename veriable name
v5:
- fix some bugs
- fix coding style issues
v4:
- implement DPDK GRO as an application-used library
- introduce lightweight and heavyweight working modes to enable
fine-grained controls to applications
- replace cuckoo hash tables with simpler table structure
v3:
- fix compilation issues.
v2:
- provide generic reassembly function;
- implement GRO as a device ability:
add APIs for devices to support GRO;
add APIs for applications to enable/disable GRO;
- update testpmd example.

Jiayu Hu (3):
   lib: add Generic Receive Offload API framework
   lib/gro: add TCP/IPv4 GRO support
   app/testpmd: enable TCP/IPv4 GRO

  app/test-pmd/cmdline.c  | 125 +
  app/test-pmd/config.c   |  37 +++
  app/test-pmd/csumonly.c |   5 +
  app/test-pmd/testpmd.c  |   3 +
  app/test-pmd/testpmd.h  |  11 +
  config/common_base  |   5 +
  doc/guides/rel_notes/release_17_08.rst  |   7 +
  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  34 +++
  lib/Makefile|   2 +
  lib/librte_gro/Makefile |  51 
  lib/librte_gro/rte_gro.c| 221 
  lib/librte_gro/rte_gro.h| 195 ++
  lib/librte_gro/rte_gro_tcp.c| 393 
  lib/librte_gro/rte_gro_tcp.h| 188 +
  lib/librte_gro/rte_gro_version.map  |  12 +
  mk/rte.app.mk   |   1 +
  16 files changed, 1290 insertions(+)
  create mode 100644 lib/librte_gro/Makefile
  create mode 100644 lib/librte_gro/rte_gro.c
  create mode 100644 lib/librte_gro/rte_gro.h
  create mode 100644 lib/librte_gro/rte_gro_tcp.c
  create mode 100644 lib/librte_gro/rte_gro_tcp.h
  create mode 100644 lib/librte_gro/rte_gro_version.map





Re: [dpdk-dev] [PATCH v6 2/3] lib/gro: add TCP/IPv4 GRO support

2017-06-25 Thread Tan, Jianfeng

Hi Jiayu,


On 6/23/2017 10:43 PM, Jiayu Hu wrote:

In this patch, we introduce five APIs to support TCP/IPv4 GRO.
- gro_tcp_tbl_create: create a TCP reassembly table, which is used to
 merge packets.
- gro_tcp_tbl_destroy: free memory space of a TCP reassembly table.
- gro_tcp_tbl_flush: flush all packets from a TCP reassembly table.
- gro_tcp_tbl_timeout_flush: flush timeout packets from a TCP
 reassembly table.
- gro_tcp4_reassemble: reassemble an inputted TCP/IPv4 packet.

TCP/IPv4 GRO API assumes all inputted packets are with correct IPv4
and TCP checksums. And TCP/IPv4 GRO API doesn't update IPv4 and TCP
checksums for merged packets. If inputted packets are IP fragmented,
TCP/IPv4 GRO API assumes they are complete packets (i.e. with L4
headers).

In TCP GRO, we use a table structure, called TCP reassembly table, to
reassemble packets. Both TCP/IPv4 and TCP/IPv6 GRO use the same table
structure. A TCP reassembly table includes a key array and a item array,
where the key array keeps the criteria to merge packets and the item
array keeps packet information.

One key in the key array points to an item group, which consists of
packets which have the same criteria value. If two packets are able to
merge, they must be in the same item group. Each key in the key array
includes two parts:
- criteria: the criteria of merging packets. If two packets can be
 merged, they must have the same criteria value.
- start_index: the index of the first incoming packet of the item group.

Each element in the item array keeps the information of one packet. It
mainly includes two parts:
- pkt: packet address
- next_pkt_index: the index of the next packet in the same item group.
 All packets in the same item group are chained by next_pkt_index.
 With next_pkt_index, we can locate all packets in the same item
 group one by one.

To process an incoming packet needs three steps:
a. check if the packet should be processed. Packets with the following
 properties won't be processed:
- packets without data (e.g. SYN, SYN-ACK)
b. traverse the key array to find a key which has the same criteria
 value with the incoming packet. If find, goto step c. Otherwise,
 insert a new key and insert the packet into the item array.
c. locate the first packet in the item group via the start_index in the
 key. Then traverse all packets in the item group via next_pkt_index.
 If find one packet which can merge with the incoming one, merge them
 together. If can't find, insert the packet into this item group.

Signed-off-by: Jiayu Hu 
---
  doc/guides/rel_notes/release_17_08.rst |   7 +
  lib/librte_gro/Makefile|   1 +
  lib/librte_gro/rte_gro.c   | 126 +--
  lib/librte_gro/rte_gro.h   |   6 +-
  lib/librte_gro/rte_gro_tcp.c   | 393 +
  lib/librte_gro/rte_gro_tcp.h   | 188 
  6 files changed, 705 insertions(+), 16 deletions(-)
  create mode 100644 lib/librte_gro/rte_gro_tcp.c
  create mode 100644 lib/librte_gro/rte_gro_tcp.h

diff --git a/doc/guides/rel_notes/release_17_08.rst 
b/doc/guides/rel_notes/release_17_08.rst
index 842f46f..f067247 100644
--- a/doc/guides/rel_notes/release_17_08.rst
+++ b/doc/guides/rel_notes/release_17_08.rst
@@ -75,6 +75,13 @@ New Features
  
Added support for firmwares with multiple Ethernet ports per physical port.
  
+* **Add Generic Receive Offload API support.**

+
+  Generic Receive Offload (GRO) API supports to reassemble TCP/IPv4
+  packets. GRO API assumes all inputted packets are with correct
+  checksums. GRO API doesn't update checksums for merged packets. If
+  inputted packets are IP fragmented, GRO API assumes they are complete
+  packets (i.e. with L4 headers).
  
  Resolved Issues

  ---
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index 7e0f128..e89344d 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -43,6 +43,7 @@ LIBABIVER := 1
  
  # source files

  SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro_tcp.c
  
  # install this header file

  SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index ebc545f..ae800f9 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -32,11 +32,15 @@
  
  #include 

  #include 
+#include 
  
  #include "rte_gro.h"

+#include "rte_gro_tcp.h"
  
-static gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NB];

-static gro_tbl_destroy_fn tbl_destroy_functions[GRO_TYPE_MAX_NB];
+static gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NB] = {
+   gro_tcp_tbl_create, NULL};
+static gro_tbl_destroy_fn tbl_destroy_functions[GRO_TYPE_MAX_NB] = {
+   gro_tcp_tbl_destroy, NULL};
  
  struct rte_gro_tbl *rte_gro_tbl_create(uint16_t socket_id,

uint16_t max_flow_num,
@@ -94,32 +98,124 @@ void rte_gro_tbl_destroy(struct r

Re: [dpdk-dev] [PATCH v6 1/3] lib: add Generic Receive Offload API framework

2017-06-25 Thread Tan, Jianfeng

Hi Jiayu,


On 6/23/2017 10:43 PM, Jiayu Hu wrote:

Generic Receive Offload (GRO) is a widely used SW-based offloading
technique to reduce per-packet processing overhead. It gains
performance by reassembling small packets into large ones. This
patchset is to support GRO in DPDK. To support GRO, this patch
implements a GRO API framework.

To enable more flexibility to applications, DPDK GRO is implemented as
a user library. Applications explicitly use the GRO library to merge
small packets into large ones. DPDK GRO provides two reassembly modes.
One is called lightweigth mode, the other is called heavyweight mode.
If applications want merge packets in a simple way, they can use
lightweigth mode. If applications need more fine-grained controls,
they can choose heavyweigth mode.

rte_gro_reassemble_burst is the main reassembly API which is used in
lightweigth mode and processes N packets at a time. For applications,
performing GRO in lightweigth mode is simple. They just need to invoke
rte_gro_reassemble_burst. Applications can get GROed packets as soon as
rte_gro_reassemble_burst returns.

rte_gro_reassemble is the main reassembly API which is used in
heavyweight mode and processes one packet at a time. For applications,
performing GRO in heavyweigth mode is relatively complicated. Before
performing GRO, applications need to create a GRO table by
rte_gro_tbl_create. Then they can use rte_gro_reassemble to process
packets one by one. The processed packets are in the GRO table. If
applications want to get them, applications need to manually flush
them by flush APIs.

Signed-off-by: Jiayu Hu 
---
  config/common_base |   5 +
  lib/Makefile   |   2 +
  lib/librte_gro/Makefile|  50 ++
  lib/librte_gro/rte_gro.c   | 125 
  lib/librte_gro/rte_gro.h   | 191 +
  lib/librte_gro/rte_gro_version.map |  12 +++
  mk/rte.app.mk  |   1 +
  7 files changed, 386 insertions(+)
  create mode 100644 lib/librte_gro/Makefile
  create mode 100644 lib/librte_gro/rte_gro.c
  create mode 100644 lib/librte_gro/rte_gro.h
  create mode 100644 lib/librte_gro/rte_gro_version.map

diff --git a/config/common_base b/config/common_base
index f6aafd1..167f5ef 100644
--- a/config/common_base
+++ b/config/common_base
@@ -712,6 +712,11 @@ CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
  CONFIG_RTE_LIBRTE_PMD_VHOST=n
  
  #

+# Compile GRO library
+#
+CONFIG_RTE_LIBRTE_GRO=y
+
+#
  #Compile Xen domain0 support
  #
  CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 07e1fd0..ac1c2f6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,6 +106,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
  DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
  DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
  DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro
+DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net
  
  ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)

  DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
new file mode 100644
index 000..7e0f128
--- /dev/null
+++ b/lib/librte_gro/Makefile
@@ -0,0 +1,50 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY O

[dpdk-dev] [PATCH v5 01/12] bus: add bus iterator to find a bus

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

This helper allows to iterate over all registered buses and find one
matching data used as parameter.

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  1 +
 lib/librte_eal/common/eal_common_bus.c  | 20 
 lib/librte_eal/common/include/rte_bus.h | 43 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
 4 files changed, 65 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 2e48a73..ed09ab2 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -162,6 +162,7 @@ DPDK_17.02 {
 DPDK_17.05 {
global:
 
+   rte_bus_find;
rte_cpu_is_supported;
rte_log_dump;
rte_log_register;
diff --git a/lib/librte_eal/common/eal_common_bus.c 
b/lib/librte_eal/common/eal_common_bus.c
index 8f9baf8..4619eb2 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -145,3 +145,23 @@ rte_bus_dump(FILE *f)
}
}
 }
+
+struct rte_bus *
+rte_bus_find(rte_bus_cmp_t cmp,
+const void *data,
+const struct rte_bus *start)
+{
+   struct rte_bus *bus = NULL;
+   int started = start == NULL;
+
+   TAILQ_FOREACH(bus, &rte_bus_list, next) {
+   if (!started) {
+   if (bus == start)
+   started = 1;
+   continue;
+   }
+   if (cmp(bus, data) == 0)
+   break;
+   }
+   return bus;
+}
diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index 5f47b82..ecf839b 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -141,6 +141,49 @@ int rte_bus_probe(void);
 void rte_bus_dump(FILE *f);
 
 /**
+ * Bus comparison function.
+ *
+ * @param bus
+ * Bus under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the bus matches the data.
+ * !0 if the bus does not match.
+ * <0 if ordering is possible and the bus is lower than the data.
+ * >0 if ordering is possible and the bus is greater than the data.
+ */
+typedef int (*rte_bus_cmp_t)(const struct rte_bus *bus, const void *data);
+
+/**
+ * Bus iterator to find a particular bus.
+ *
+ * This function compares each registered bus to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more buses. To continue a search the bus of a previous search can
+ * be passed via the start parameter.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ *  Data to pass to comparison function.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @return
+ *  A pointer to a rte_bus structure or NULL in case no bus matches
+ */
+struct rte_bus *rte_bus_find(rte_bus_cmp_t cmp,
+const void *data,
+const struct rte_bus *start);
+
+/**
  * Helper for Bus registration.
  * The constructor has higher priority than PMD constructors.
  */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3..6efa517 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -166,6 +166,7 @@ DPDK_17.02 {
 DPDK_17.05 {
global:
 
+   rte_bus_find;
rte_cpu_is_supported;
rte_intr_free_epoll_fd;
rte_log_dump;
-- 
2.1.4



[dpdk-dev] [PATCH v5 00/12] bus: attach / detach API

2017-06-25 Thread Gaetan Rivet
Following the work from Jan:

This patchset introduces the attach / detach API to rte_bus.
The rte_device structure is used as the generic device representation.

This API is implemented for the virtual bus.
The functions rte_eal_dev_attach and rte_eal_dev_detach are updated to
use this new interface.

-- v2

0. API rework
-

I would like to propose an evolution on the API developed by Jan.

The attach / detach rte_bus API is necessary to support the attach/detach
rte_dev API. Those are two different levels for one similar functionality.

Attach / detach does not allow true hotplugging, because the attach
function expects the devices operated upon to already exist within the
buses / sub-layers. This means that this API expects devices meta-datas
(bus-internal device representation and associated device information
read from the system) to be present upon attach. This part of the work
is done during scanning.

While it is best to avoid changing the public rte_dev API as it already
exists, nothing prevents this new rte_bus API from superseeding it.
It has been said during the previous release cycle that device hotplug
was a feature that interested users. True hotplug is not allowed by the
current attach / detach API. Worse, this API hinders the effort to bring
this new functionality by squatting its semantic field.

Thus, I propose to rename rte_bus attach / detach; plug / unplug. As it
is a superset of the attach / detach functionality, it can be used to
implement rte_dev attach / detach. Now is the right time to pivot to
this new feature.

This should help maintainers understanding the aim of this API and the
differences with the APIs higher-up, clarify the field and allow a new
functionality to be proposed.

The vdev bus is inherently supporting the new API, however it has been
made explicit. My implementation in the PCI bus in further patchset also
follows the rte_bus hotplug API instead of only attach / detach.

One remaining problem with the vdev bus is the rte_dev attach
implementation, which needs the rte_devargs rework to be properly fixed.

1. Additional evolutions in the patchset


The RTE_VERIFY on the find_device is too stringent I think and forces
all buses to implement a public device iterator. While it could be
argued that it would push for quicker support for the functionality, I
think it's possible that some buses are not interested at all in it and
should simply be ignored.

The bus devices iterator has been fixed.

The internal rte_device handle was not properly setup within the
net_ring PMD.

-- v3

The new API is now

typedef struct rte_device * (*rte_bus_plug_t)(struct rte_devargs *da);
typedef int (*rte_bus_unplug_t)(struct rte_device *dev);

So, plugging a device takes an rte_devargs as input and returns an rte_device.
While implementing related subsystems, I found that I usually needed
this rte_device handle upon a successful device plugging. This seems the
sensible and useful thing to do.
As such, on error NULL is returned and rte_errno is set by the bus.

Unplugging a device however now returns to the first version, which used
an rte_device. The explicit contract here is that if one has an
rte_device that has been obtained by calling bus->plug(), then this
handle can be used for bus->unplug().

Additionally, bus and device comparators now returns 0 on match,
following strcmp-like behavior.

-- v4

* rte_bus_find now takes a *start* parameter, that can be null.
  The bus search starts from this element if set.

* A few doc fixes.

* The rte_device field was fixed within the rte_ring PMD in a previous patch.
  This fix has been integrated by other means, it is not necessary anymore.

-- v5

* The commit
ethdev: use embedded rte_device to detach driver
  has been removed from this series to be sent separately.

* The PCI support for device access and hotplug is merged in this series
  instead of being proposed as a separate patchset.

* A few nitpicks to the code itself have been fixed.

* Some documentation has been reworked.

Gaetan Rivet (5):
  vdev: implement hotplug functionality
  vdev: expose bus name
  vdev: use standard bus registration function
  pci: implement find_device bus operation
  pci: implement hotplug bus operation

Jan Blunck (7):
  bus: add bus iterator to find a bus
  bus: add device iterator method
  bus: add helper to find which bus holds a device
  bus: add bus iterator to find a device
  bus: introduce hotplug functionality
  vdev: implement find_device bus operation
  eal: make virtual driver probe and remove take rte_vdev_device

 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +
 lib/librte_eal/common/eal_common_bus.c  |  71 ++
 lib/librte_eal/common/eal_common_dev.c  |  93 +-
 lib/librte_eal/common/eal_common_pci.c  |  57 +++
 lib/librte_eal/common/eal_common_vdev.c |  65 ++---
 lib/librte_eal/common/include/rte_bus.h

[dpdk-dev] [PATCH v5 02/12] bus: add device iterator method

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/include/rte_bus.h | 19 +++
 lib/librte_eal/common/include/rte_dev.h | 21 +
 2 files changed, 40 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index ecf839b..5efb76e 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -82,6 +82,24 @@ typedef int (*rte_bus_scan_t)(void);
 typedef int (*rte_bus_probe_t)(void);
 
 /**
+ * Device iterator to find a device on a bus.
+ *
+ * This function returns an rte_device if one of those held by the bus
+ * matches the data passed as parameter.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to compare each device against.
+ *
+ * @return
+ * The first device matching the data, NULL if none exists.
+ */
+typedef struct rte_device * (*rte_bus_find_device_t)(rte_dev_cmp_t cmp,
+const void *data);
+
+/**
  * A structure describing a generic bus.
  */
 struct rte_bus {
@@ -89,6 +107,7 @@ struct rte_bus {
const char *name;/**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
rte_bus_probe_t probe;   /**< Probe devices on bus */
+   rte_bus_find_device_t find_device; /**< Find a device on bus */
 };
 
 /**
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index de20c06..04d9c28 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -191,6 +191,27 @@ int rte_eal_dev_attach(const char *name, const char 
*devargs);
  */
 int rte_eal_dev_detach(const char *name);
 
+/**
+ * Device comparison function.
+ *
+ * This type of function is used to compare an rte_device with arbitrary
+ * data.
+ *
+ * @param dev
+ *   Device handle.
+ *
+ * @param data
+ *   Data to compare against. The type of this parameter is determined by
+ *   the kind of comparison performed by the function.
+ *
+ * @return
+ *   0 if the device matches the data.
+ *   !0 if the device does not match.
+ *   <0 if ordering is possible and the device is lower than the data.
+ *   >0 if ordering is possible and the device is greater than the data.
+ */
+typedef int (*rte_dev_cmp_t)(const struct rte_device *dev, const void *data);
+
 #define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
 
 #define RTE_PMD_EXPORT_NAME(name, idx) \
-- 
2.1.4



[dpdk-dev] [PATCH v5 03/12] bus: add helper to find which bus holds a device

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  1 +
 lib/librte_eal/common/eal_common_bus.c  | 25 +
 lib/librte_eal/common/include/rte_bus.h |  5 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
 4 files changed, 32 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index ed09ab2..f1a0765 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -163,6 +163,7 @@ DPDK_17.05 {
global:
 
rte_bus_find;
+   rte_bus_find_by_device;
rte_cpu_is_supported;
rte_log_dump;
rte_log_register;
diff --git a/lib/librte_eal/common/eal_common_bus.c 
b/lib/librte_eal/common/eal_common_bus.c
index 4619eb2..d208214 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -165,3 +165,28 @@ rte_bus_find(rte_bus_cmp_t cmp,
}
return bus;
 }
+
+static int
+cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
+{
+   const struct rte_device *dev2 = _dev2;
+
+   return dev1 != dev2;
+}
+
+static int
+bus_find_device(const struct rte_bus *bus, const void *_dev)
+{
+   struct rte_device *dev;
+
+   if (bus->find_device == NULL)
+   return -1;
+   dev = bus->find_device(cmp_rte_device, _dev);
+   return dev == NULL;
+}
+
+struct rte_bus *
+rte_bus_find_by_device(const struct rte_device *dev)
+{
+   return rte_bus_find(bus_find_device, (const void *)dev, NULL);
+}
diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index 5efb76e..5441af9 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -203,6 +203,11 @@ struct rte_bus *rte_bus_find(rte_bus_cmp_t cmp,
 const struct rte_bus *start);
 
 /**
+ * Find the registered bus for a particular device.
+ */
+struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
+
+/**
  * Helper for Bus registration.
  * The constructor has higher priority than PMD constructors.
  */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 6efa517..6f77222 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -167,6 +167,7 @@ DPDK_17.05 {
global:
 
rte_bus_find;
+   rte_bus_find_by_device;
rte_cpu_is_supported;
rte_intr_free_epoll_fd;
rte_log_dump;
-- 
2.1.4



[dpdk-dev] [PATCH v5 04/12] bus: add bus iterator to find a device

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |  1 +
 lib/librte_eal/common/eal_common_bus.c  | 24 +++
 lib/librte_eal/common/include/rte_bus.h | 26 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |  1 +
 4 files changed, 52 insertions(+)

diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map 
b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index f1a0765..21640d6 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -164,6 +164,7 @@ DPDK_17.05 {
 
rte_bus_find;
rte_bus_find_by_device;
+   rte_bus_find_device;
rte_cpu_is_supported;
rte_log_dump;
rte_log_register;
diff --git a/lib/librte_eal/common/eal_common_bus.c 
b/lib/librte_eal/common/eal_common_bus.c
index d208214..63fd9f1 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -190,3 +190,27 @@ rte_bus_find_by_device(const struct rte_device *dev)
 {
return rte_bus_find(bus_find_device, (const void *)dev, NULL);
 }
+
+struct rte_device *
+rte_bus_find_device(rte_dev_cmp_t cmp, const void *data,
+   const struct rte_device *start)
+{
+   struct rte_bus *bus;
+   struct rte_device *dev = NULL;
+   int started = start == NULL;
+
+   TAILQ_FOREACH(bus, &rte_bus_list, next) {
+   if (!bus->find_device)
+   continue;
+   if (!started) {
+   dev = bus->find_device(cmp_rte_device, start);
+   if (dev)
+   started = 1;
+   continue;
+   }
+   dev = bus->find_device(cmp, data);
+   if (dev)
+   break;
+   }
+   return dev;
+}
diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index 5441af9..3e83227 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -203,6 +203,32 @@ struct rte_bus *rte_bus_find(rte_bus_cmp_t cmp,
 const struct rte_bus *start);
 
 /**
+ * Bus iterator to find a particular device.
+ *
+ * This function searches each registered bus to find a device that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more buses and devices. To continue a search the device of
+ * a previous search can be passed via the start parameter.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @return
+ * A pointer to an rte_bus structure or NULL in case no device matches.
+ */
+struct rte_device *rte_bus_find_device(rte_dev_cmp_t cmp,
+  const void *data,
+  const struct rte_device *start);
+
+/**
  * Find the registered bus for a particular device.
  */
 struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 6f77222..e0a056d 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -168,6 +168,7 @@ DPDK_17.05 {
 
rte_bus_find;
rte_bus_find_by_device;
+   rte_bus_find_device;
rte_cpu_is_supported;
rte_intr_free_epoll_fd;
rte_log_dump;
-- 
2.1.4



[dpdk-dev] [PATCH v5 06/12] vdev: implement find_device bus operation

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_vdev.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_vdev.c 
b/lib/librte_eal/common/eal_common_vdev.c
index 0037a64..52528ef 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -338,9 +338,22 @@ vdev_probe(void)
return 0;
 }
 
+static struct rte_device *
+vdev_find_device(rte_dev_cmp_t cmp, const void *data)
+{
+   struct rte_vdev_device *dev;
+
+   TAILQ_FOREACH(dev, &vdev_device_list, next) {
+   if (cmp(&dev->device, data) == 0)
+   return &dev->device;
+   }
+   return NULL;
+}
+
 static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
.probe = vdev_probe,
+   .find_device = vdev_find_device,
 };
 
 RTE_INIT(rte_vdev_bus_register);
-- 
2.1.4



[dpdk-dev] [PATCH v5 07/12] vdev: implement hotplug functionality

2017-06-25 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_vdev.c | 36 +
 1 file changed, 36 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_vdev.c 
b/lib/librte_eal/common/eal_common_vdev.c
index 52528ef..22e4640 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -38,11 +38,13 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 /** Double linked list of virtual device drivers. */
 TAILQ_HEAD(vdev_device_list, rte_vdev_device);
@@ -350,10 +352,44 @@ vdev_find_device(rte_dev_cmp_t cmp, const void *data)
return NULL;
 }
 
+static struct rte_device *
+vdev_plug(struct rte_devargs *da)
+{
+   struct rte_vdev_device *dev;
+   int ret;
+
+   ret = rte_vdev_init(da->virt.drv_name, da->args);
+   if (ret) {
+   rte_errno = -ret;
+   return NULL;
+   }
+   dev = find_vdev(da->virt.drv_name);
+   return &dev->device;
+}
+
+static int
+vdev_unplug(struct rte_device *dev)
+{
+   struct rte_devargs *da;
+   int ret;
+
+   da = dev->devargs;
+   if (da == NULL) {
+   rte_errno = EINVAL;
+   return -1;
+   }
+   ret = rte_vdev_uninit(da->virt.drv_name);
+   if (ret)
+   rte_errno = -ret;
+   return ret;
+}
+
 static struct rte_bus rte_vdev_bus = {
.scan = vdev_scan,
.probe = vdev_probe,
.find_device = vdev_find_device,
+   .plug = vdev_plug,
+   .unplug = vdev_unplug,
 };
 
 RTE_INIT(rte_vdev_bus_register);
-- 
2.1.4



[dpdk-dev] [PATCH v5 05/12] bus: introduce hotplug functionality

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_bus.c  |  2 ++
 lib/librte_eal/common/include/rte_bus.h | 31 +++
 2 files changed, 33 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_bus.c 
b/lib/librte_eal/common/eal_common_bus.c
index 63fd9f1..0035da0 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -50,6 +50,8 @@ rte_bus_register(struct rte_bus *bus)
/* A bus should mandatorily have the scan implemented */
RTE_VERIFY(bus->scan);
RTE_VERIFY(bus->probe);
+   /* Buses supporting hotplug also require unplug. */
+   RTE_VERIFY(!bus->plug || bus->unplug);
 
TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
diff --git a/lib/librte_eal/common/include/rte_bus.h 
b/lib/librte_eal/common/include/rte_bus.h
index 3e83227..187c37e 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -100,6 +100,35 @@ typedef struct rte_device * 
(*rte_bus_find_device_t)(rte_dev_cmp_t cmp,
 const void *data);
 
 /**
+ * Implementation specific probe function which is responsible for linking
+ * devices on that bus with applicable drivers.
+ * The plugged device might already have been used previously by the bus,
+ * in which case some buses might prefer to detect and re-use the relevant
+ * information pertaining to this device.
+ *
+ * @param da
+ * Device declaration.
+ *
+ * @return
+ * The pointer to a valid rte_device usable by the bus on success.
+ * NULL on error. rte_errno is then set.
+ */
+typedef struct rte_device * (*rte_bus_plug_t)(struct rte_devargs *da);
+
+/**
+ * Implementation specific remove function which is responsible for unlinking
+ * devices on that bus from assigned driver.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous device plug call.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error. rte_errno is then set.
+ */
+typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
+
+/**
  * A structure describing a generic bus.
  */
 struct rte_bus {
@@ -108,6 +137,8 @@ struct rte_bus {
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
rte_bus_probe_t probe;   /**< Probe devices on bus */
rte_bus_find_device_t find_device; /**< Find a device on bus */
+   rte_bus_plug_t plug; /**< Probe single device for drivers */
+   rte_bus_unplug_t unplug; /**< Remove single device from driver */
 };
 
 /**
-- 
2.1.4



[dpdk-dev] [PATCH v5 08/12] vdev: expose bus name

2017-06-25 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/include/rte_vdev.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_vdev.h 
b/lib/librte_eal/common/include/rte_vdev.h
index e6b678e..2d02c68 100644
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -41,6 +41,8 @@ extern "C" {
 #include 
 #include 
 
+#define VIRTUAL_BUS_NAME "virtual"
+
 struct rte_vdev_device {
TAILQ_ENTRY(rte_vdev_device) next;  /**< Next attached vdev */
struct rte_device device;   /**< Inherit core device */
-- 
2.1.4



[dpdk-dev] [PATCH v5 09/12] vdev: use standard bus registration function

2017-06-25 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_vdev.c | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_vdev.c 
b/lib/librte_eal/common/eal_common_vdev.c
index 22e4640..a654709 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -54,14 +54,10 @@ static struct vdev_device_list vdev_device_list =
 struct vdev_driver_list vdev_driver_list =
TAILQ_HEAD_INITIALIZER(vdev_driver_list);
 
-static void rte_vdev_bus_register(void);
-
 /* register a driver */
 void
 rte_vdev_register(struct rte_vdev_driver *driver)
 {
-   rte_vdev_bus_register();
-
TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
 }
 
@@ -392,16 +388,4 @@ static struct rte_bus rte_vdev_bus = {
.unplug = vdev_unplug,
 };
 
-RTE_INIT(rte_vdev_bus_register);
-
-static void rte_vdev_bus_register(void)
-{
-   static int registered;
-
-   if (registered)
-   return;
-
-   registered = 1;
-   rte_vdev_bus.name = RTE_STR(virtual);
-   rte_bus_register(&rte_vdev_bus);
-}
+RTE_REGISTER_BUS(VIRTUAL_BUS_NAME, rte_vdev_bus);
-- 
2.1.4



[dpdk-dev] [PATCH v5 10/12] pci: implement find_device bus operation

2017-06-25 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_pci.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 78b097e..00d48d9 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -488,10 +488,23 @@ rte_pci_remove_device(struct rte_pci_device *pci_dev)
TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
 }
 
+static struct rte_device *
+pci_find_device(rte_dev_cmp_t cmp, const void *data)
+{
+   struct rte_pci_device *dev;
+
+   FOREACH_DEVICE_ON_PCIBUS(dev) {
+   if (cmp(&dev->device, data) == 0)
+   return &dev->device;
+   }
+   return NULL;
+}
+
 struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
.probe = rte_pci_probe,
+   .find_device = pci_find_device,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
-- 
2.1.4



[dpdk-dev] [PATCH v5 12/12] eal: make virtual driver probe and remove take rte_vdev_device

2017-06-25 Thread Gaetan Rivet
From: Jan Blunck 

This is a preparation to embed the generic rte_device into the rte_eth_dev
also for virtual devices.

Signed-off-by: Jan Blunck 
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_dev.c | 93 ++
 1 file changed, 71 insertions(+), 22 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index a400ddd..d83ae41 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -45,50 +46,98 @@
 
 #include "eal_private.h"
 
+static int cmp_detached_dev_name(const struct rte_device *dev,
+   const void *_name)
+{
+   const char *name = _name;
+
+   /* skip attached devices */
+   if (dev->driver)
+   return 0;
+
+   return strcmp(dev->name, name);
+}
+
 int rte_eal_dev_attach(const char *name, const char *devargs)
 {
-   struct rte_pci_addr addr;
+   struct rte_device *dev;
+   int ret;
 
if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
return -EINVAL;
}
 
-   if (eal_parse_pci_DomBDF(name, &addr) == 0) {
-   if (rte_pci_probe_one(&addr) < 0)
-   goto err;
+   dev = rte_bus_find_device(cmp_detached_dev_name, name, NULL);
+   if (dev) {
+   struct rte_bus *bus;
+
+   bus = rte_bus_find_by_device(dev);
+   if (!bus) {
+   RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+   name);
+   return -EINVAL;
+   }
 
-   } else {
-   if (rte_vdev_init(name, devargs))
-   goto err;
+   if (!bus->plug) {
+   RTE_LOG(ERR, EAL, "Bus function not supported\n");
+   return -ENOTSUP;
+   }
+
+   ret = (bus->plug(dev->devargs) == NULL);
+   goto out;
}
 
-   return 0;
+   /*
+* If we haven't found a bus device the user meant to "hotplug" a
+* virtual device instead.
+*/
+   ret = rte_vdev_init(name, devargs);
+out:
+   if (ret)
+   RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+   name);
+   return ret;
+}
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+   const char *name = _name;
 
-err:
-   RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name);
-   return -EINVAL;
+   return strcmp(dev->name, name);
 }
 
 int rte_eal_dev_detach(const char *name)
 {
-   struct rte_pci_addr addr;
+   struct rte_device *dev;
+   struct rte_bus *bus;
+   int ret;
 
if (name == NULL) {
RTE_LOG(ERR, EAL, "Invalid device provided.\n");
return -EINVAL;
}
 
-   if (eal_parse_pci_DomBDF(name, &addr) == 0) {
-   if (rte_pci_detach(&addr) < 0)
-   goto err;
-   } else {
-   if (rte_vdev_uninit(name))
-   goto err;
+   dev = rte_bus_find_device(cmp_dev_name, name, NULL);
+   if (!dev) {
+   RTE_LOG(ERR, EAL, "Cannot find device (%s)\n", name);
+   return -EINVAL;
+   }
+
+   bus = rte_bus_find_by_device(dev);
+   if (!bus) {
+   RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", name);
+   return -EINVAL;
+   }
+
+   if (!bus->unplug) {
+   RTE_LOG(ERR, EAL, "Bus function not supported\n");
+   return -ENOTSUP;
}
-   return 0;
 
-err:
-   RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
-   return -EINVAL;
+   ret = bus->unplug(dev);
+   if (ret)
+   RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+   name);
+   return ret;
 }
-- 
2.1.4



[dpdk-dev] [PATCH v5 11/12] pci: implement hotplug bus operation

2017-06-25 Thread Gaetan Rivet
Signed-off-by: Gaetan Rivet 
---
 lib/librte_eal/common/eal_common_pci.c | 44 ++
 1 file changed, 44 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 00d48d9..286357d 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -500,11 +501,54 @@ pci_find_device(rte_dev_cmp_t cmp, const void *data)
return NULL;
 }
 
+static struct rte_device *
+pci_plug(struct rte_devargs *da)
+{
+   struct rte_pci_device *pdev;
+   struct rte_pci_addr *addr;
+
+   addr = &da->pci.addr;
+   /*
+* Update eventual pci device in global list.
+* Insert it if none was found.
+*/
+   if (pci_update_device(addr) < 0) {
+   rte_errno = EIO;
+   return NULL;
+   }
+   /* Find the current device holding this address in the bus. */
+   FOREACH_DEVICE_ON_PCIBUS(pdev) {
+   if (rte_eal_compare_pci_addr(&pdev->addr, addr) == 0) {
+   if (rte_pci_probe_one(addr)) {
+   rte_errno = ENODEV;
+   return NULL;
+   }
+   break;
+   }
+   }
+   return pdev ? &pdev->device : NULL;
+}
+
+static int
+pci_unplug(struct rte_device *dev)
+{
+   struct rte_pci_device *pdev;
+
+   pdev = RTE_DEV_TO_PCI(dev);
+   if (rte_pci_detach(&pdev->addr)) {
+   rte_errno = ENODEV;
+   return -1;
+   }
+   return 0;
+}
+
 struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
.probe = rte_pci_probe,
.find_device = pci_find_device,
+   .plug = pci_plug,
+   .unplug = pci_unplug,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
-- 
2.1.4



Re: [dpdk-dev] [PATCH v2 00/11] bus: attach / detach API

2017-06-25 Thread Gaëtan Rivet
Hi Stephen,

On Wed, May 31, 2017 at 08:34:26AM -0700, Stephen Hemminger wrote:
> On Wed, 31 May 2017 15:17:45 +0200
> Gaetan Rivet  wrote:
> 
> > Following the work from Jan:
> > 
> > This patchset introduces the attach / detach API to rte_bus.
> > The rte_device structure is used as the generic device representation.
> > 
> > This API is implemented for the virtual bus.
> > The functions rte_eal_dev_attach and rte_eal_dev_detach are updated to
> > use this new interface.
> > 
> > --
> > 
> > 0. API rework
> > -
> > 
> > I would like to propose an evolution on the API developed by Jan.
> > 
> > The attach / detach rte_bus API is necessary to support the attach/detach
> > rte_dev API. Those are two different levels for one similar functionality.
> > 
> > Attach / detach does not allow true hotplugging, because the attach
> > function expects the devices operated upon to already exist within the
> > buses / sub-layers. This means that this API expects devices meta-datas
> > (bus-internal device representation and associated device information
> > read from the system) to be present upon attach. This part of the work
> > is done during scanning.
> > 
> > While it is best to avoid changing the public rte_dev API as it already
> > exists, nothing prevents this new rte_bus API from superseeding it.
> > It has been said during the previous release cycle that device hotplug
> > was a feature that interested users. True hotplug is not allowed by the
> > current attach / detach API. Worse, this API hinders the effort to bring
> > this new functionality by squatting its semantic field.
> > 
> > Thus, I propose to rename rte_bus attach / detach; plug / unplug. As it
> > is a superset of the attach / detach functionality, it can be used to
> > implement rte_dev attach / detach. Now is the right time to pivot to
> > this new feature.
> > 
> > This should help maintainers understanding the aim of this API and the
> > differences with the APIs higher-up, clarify the field and allow a new
> > functionality to be proposed.
> > 
> > The vdev bus is inherently supporting the new API, however it has been
> > made explicit. My implementation in the PCI bus in further patchset also
> > follows the rte_bus hotplug API instead of only attach / detach.
> > 
> > One remaining problem with the vdev bus is the rte_dev attach
> > implementation, which needs the rte_devargs rework to be properly fixed.
> > 
> > 1. Additional evolutions in the patchset
> > 
> > 
> > The RTE_VERIFY on the find_device is too stringent I think and forces
> > all buses to implement a public device iterator. While it could be
> > argued that it would push for quicker support for the functionality, I
> > think it's possible that some buses are not interested at all in it and
> > should simply be ignored.
> > 
> > The bus devices iterator has been fixed.
> > 
> > The internal rte_device handle was not properly setup within the
> > net_ring PMD.
> > 
> > Gaetan Rivet (2):
> >   vdev: implement hotplug functionality
> >   net/ring: fix dev handle in eth_dev
> > 
> > Jan Blunck (9):
> >   bus: add bus iterator to find a particular bus
> >   bus: add device iterator
> >   bus: add helper to find bus for a particular device
> >   bus: add bus helper iterator to find a particular device
> >   bus: introduce hotplug functionality
> >   vdev: implement find_device bus operation
> >   vdev: implement unplug bus operation
> >   eal: make virtual driver probe and remove take rte_vdev_device
> >   ethdev: Use embedded rte_device to detach driver
> > 
> >  drivers/net/ring/rte_eth_ring.c |   7 ++
> >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
> >  lib/librte_eal/common/eal_common_bus.c  |  65 +++
> >  lib/librte_eal/common/eal_common_dev.c  | 100 
> > ++--
> >  lib/librte_eal/common/eal_common_vdev.c |  27 +++
> >  lib/librte_eal/common/include/rte_bus.h |  87 +
> >  lib/librte_eal/common/include/rte_dev.h |  26 ++
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +
> >  lib/librte_ether/rte_ethdev.c   |   3 +-
> >  9 files changed, 299 insertions(+), 23 deletions(-)
> > 
> 
> LGTM
> 
> Maybe we should evolve it by having both rte_bus and  rte_dev API for one 
> release and mark
> the rte_dev API for attach/detach as deprecated?

Sorry for the late response.

I think that if the hotplug API is correctly designed, the rte_dev API
could indeed be deprecated and then removed. I do not see a reason right
now to keep it.

-- 
Gaëtan Rivet
6WIND


Re: [dpdk-dev] [PATCH v6 0/3] Support TCP/IPv4 GRO in DPDK

2017-06-25 Thread Jiayu Hu
Hi Jianfeng,

On Mon, Jun 26, 2017 at 12:03:33AM +0800, Tan, Jianfeng wrote:
> 
> 
> On 6/23/2017 10:43 PM, Jiayu Hu wrote:
> > Generic Receive Offload (GRO) is a widely used SW-based offloading
> > technique to reduce per-packet processing overhead. It gains performance
> > by reassembling small packets into large ones. Therefore, we propose to
> > support GRO in DPDK.
> > 
> > To enable more flexibility to applications, DPDK GRO is implemented as
> > a user library. Applications explicitly use the GRO library to merge
> > small packets into large ones. DPDK GRO provides two reassembly modes:
> > lightweigth mode and heavyweight mode. If applications want to merge
> > packets in a simple way, they can select lightweight mode API. If
> > applications need more fine-grained controls, they can select heavyweigth
> > mode API.
> > 
> > This patchset is to support TCP/IPv4 GRO in DPDK. The first patch is to
> > provide a GRO API framework. The second patch is to support TCP/IPv4 GRO.
> > The last patch is to enable TCP/IPv4 GRO in testpmd.
> > 
> > We perform many iperf tests to see the performance gains from DPDK GRO.
> > 
> > The test environment is:
> > a. two 25Gbps physical ports (p0 and p1) are linked together. Assign p0
> > to one networking namespace and assign p1 to DPDK;
> > b. enable TSO for p0. Run iperf client on p0;
> > c. launch testpmd with p1 and a vhost-user port, and run it in csum
> > forwarding mode. Select TCP HW checksum calculation for the
> > vhost-user port in csum forwarding engine. And for better
> > performance, we select IPv4 and TCP HW checksum calculation for p1
> > too;
> > d. launch a VM with one CPU core and a virtio-net port. The VM OS is
> > ubuntu 16.04 whose virtio-net driver supports GRO. Enables RX csum
> > offloading and mrg_rxbuf for the VM. Iperf server runs in the VM;
> > e. to run iperf tests, we need to avoid the csum forwarding engine
> > compulsorily changes packet mac addresses. SO in our tests, we
> > comment these codes out (line701 ~ line704 in csumonly.c).
> > 
> > In each test, we run iperf with the following three configurations:
> > - single flow and single TCP stream
> > - multiple flows and single TCP stream
> > - single flow and parallel TCP streams
> 
> To  me, flow == TCP stream; so could you explain what does flow mean?

Sorry, I use inappropriate terms. 'flow' means TCP connection here. And
'multiple TCP streams' means parallel iperf-client threads.

Thanks,
Jiayu

> 
> > 
> > We run above iperf tests on three scenatios:
> > s1: disabling kernel GRO and enabling DPDK GRO
> > s2: disabling kernel GRO and disabling DPDK GRO
> > s3: enabling kernel GRO and disabling DPDK GRO
> > Comparing the throughput of s1 with s2, we can see the performance gains
> > from DPDK GRO. Comparing the throughput of s1 and s3, we can compare DPDK
> > GRO performance with kernel GRO performance.
> > 
> > Test results:
> > - DPDK GRO throughput is almost 2 times than the throughput of no
> > DPDK GRO and no kernel GRO;
> > - DPDK GRO throughput is almost 1.2 times than the throughput of
> > kernel GRO.
> > 
> > Change log
> > ==
> > v6:
> > - avoid checksum validation and calculation
> > - enable to process IP fragmented packets
> > - add a command in testpmd
> > - update documents
> > - modify rte_gro_timeout_flush and rte_gro_reassemble_burst
> > - rename veriable name
> > v5:
> > - fix some bugs
> > - fix coding style issues
> > v4:
> > - implement DPDK GRO as an application-used library
> > - introduce lightweight and heavyweight working modes to enable
> > fine-grained controls to applications
> > - replace cuckoo hash tables with simpler table structure
> > v3:
> > - fix compilation issues.
> > v2:
> > - provide generic reassembly function;
> > - implement GRO as a device ability:
> > add APIs for devices to support GRO;
> > add APIs for applications to enable/disable GRO;
> > - update testpmd example.
> > 
> > Jiayu Hu (3):
> >lib: add Generic Receive Offload API framework
> >lib/gro: add TCP/IPv4 GRO support
> >app/testpmd: enable TCP/IPv4 GRO
> > 
> >   app/test-pmd/cmdline.c  | 125 +
> >   app/test-pmd/config.c   |  37 +++
> >   app/test-pmd/csumonly.c |   5 +
> >   app/test-pmd/testpmd.c  |   3 +
> >   app/test-pmd/testpmd.h  |  11 +
> >   config/common_base  |   5 +
> >   doc/guides/rel_notes/release_17_08.rst  |   7 +
> >   doc/guides/testpmd_app_ug/testpmd_funcs.rst |  34 +++
> >   lib/Makefile|   2 +
> >   lib/librte_gro/Makefile |  51 
> >   lib/librte_gro/rte_gro.c| 221 
> >   lib/librte_gro/rte_gro.h| 195 ++
> >   lib/librte_gro/rte_gro_tcp.c| 393 
> > +

Re: [dpdk-dev] [PATCH v6 2/3] lib/gro: add TCP/IPv4 GRO support

2017-06-25 Thread Jiayu Hu
Hi Jianfeng,

On Mon, Jun 26, 2017 at 12:53:31AM +0800, Tan, Jianfeng wrote:
> Hi Jiayu,
> 
> 
> On 6/23/2017 10:43 PM, Jiayu Hu wrote:
> > In this patch, we introduce five APIs to support TCP/IPv4 GRO.
> > - gro_tcp_tbl_create: create a TCP reassembly table, which is used to
> >  merge packets.
> > - gro_tcp_tbl_destroy: free memory space of a TCP reassembly table.
> > - gro_tcp_tbl_flush: flush all packets from a TCP reassembly table.
> > - gro_tcp_tbl_timeout_flush: flush timeout packets from a TCP
> >  reassembly table.
> > - gro_tcp4_reassemble: reassemble an inputted TCP/IPv4 packet.
> > 
> > TCP/IPv4 GRO API assumes all inputted packets are with correct IPv4
> > and TCP checksums. And TCP/IPv4 GRO API doesn't update IPv4 and TCP
> > checksums for merged packets. If inputted packets are IP fragmented,
> > TCP/IPv4 GRO API assumes they are complete packets (i.e. with L4
> > headers).
> > 
> > In TCP GRO, we use a table structure, called TCP reassembly table, to
> > reassemble packets. Both TCP/IPv4 and TCP/IPv6 GRO use the same table
> > structure. A TCP reassembly table includes a key array and a item array,
> > where the key array keeps the criteria to merge packets and the item
> > array keeps packet information.
> > 
> > One key in the key array points to an item group, which consists of
> > packets which have the same criteria value. If two packets are able to
> > merge, they must be in the same item group. Each key in the key array
> > includes two parts:
> > - criteria: the criteria of merging packets. If two packets can be
> >  merged, they must have the same criteria value.
> > - start_index: the index of the first incoming packet of the item group.
> > 
> > Each element in the item array keeps the information of one packet. It
> > mainly includes two parts:
> > - pkt: packet address
> > - next_pkt_index: the index of the next packet in the same item group.
> >  All packets in the same item group are chained by next_pkt_index.
> >  With next_pkt_index, we can locate all packets in the same item
> >  group one by one.
> > 
> > To process an incoming packet needs three steps:
> > a. check if the packet should be processed. Packets with the following
> >  properties won't be processed:
> > - packets without data (e.g. SYN, SYN-ACK)
> > b. traverse the key array to find a key which has the same criteria
> >  value with the incoming packet. If find, goto step c. Otherwise,
> >  insert a new key and insert the packet into the item array.
> > c. locate the first packet in the item group via the start_index in the
> >  key. Then traverse all packets in the item group via next_pkt_index.
> >  If find one packet which can merge with the incoming one, merge them
> >  together. If can't find, insert the packet into this item group.
> > 
> > Signed-off-by: Jiayu Hu 
> > ---
> >   doc/guides/rel_notes/release_17_08.rst |   7 +
> >   lib/librte_gro/Makefile|   1 +
> >   lib/librte_gro/rte_gro.c   | 126 +--
> >   lib/librte_gro/rte_gro.h   |   6 +-
> >   lib/librte_gro/rte_gro_tcp.c   | 393 
> > +
> >   lib/librte_gro/rte_gro_tcp.h   | 188 
> >   6 files changed, 705 insertions(+), 16 deletions(-)
> >   create mode 100644 lib/librte_gro/rte_gro_tcp.c
> >   create mode 100644 lib/librte_gro/rte_gro_tcp.h
> > 
> > diff --git a/doc/guides/rel_notes/release_17_08.rst 
> > b/doc/guides/rel_notes/release_17_08.rst
> > index 842f46f..f067247 100644
> > --- a/doc/guides/rel_notes/release_17_08.rst
> > +++ b/doc/guides/rel_notes/release_17_08.rst
> > @@ -75,6 +75,13 @@ New Features
> > Added support for firmwares with multiple Ethernet ports per physical 
> > port.
> > +* **Add Generic Receive Offload API support.**
> > +
> > +  Generic Receive Offload (GRO) API supports to reassemble TCP/IPv4
> > +  packets. GRO API assumes all inputted packets are with correct
> > +  checksums. GRO API doesn't update checksums for merged packets. If
> > +  inputted packets are IP fragmented, GRO API assumes they are complete
> > +  packets (i.e. with L4 headers).
> >   Resolved Issues
> >   ---
> > diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
> > index 7e0f128..e89344d 100644
> > --- a/lib/librte_gro/Makefile
> > +++ b/lib/librte_gro/Makefile
> > @@ -43,6 +43,7 @@ LIBABIVER := 1
> >   # source files
> >   SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro_tcp.c
> >   # install this header file
> >   SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
> > diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
> > index ebc545f..ae800f9 100644
> > --- a/lib/librte_gro/rte_gro.c
> > +++ b/lib/librte_gro/rte_gro.c
> > @@ -32,11 +32,15 @@
> >   #include 
> >   #include 
> > +#include 
> >   #include "rte_gro.h"
> > +#include "rte_gro_tcp.h"
> > -static gro_tbl_create_fn tbl_create_functions[GR

Re: [dpdk-dev] [PATCH 2/3] eal: PCI domain should be 32 bits

2017-06-25 Thread Chang, Cunyin


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Saturday, June 24, 2017 1:47 AM
> To: Chang, Cunyin 
> Cc: dev@dpdk.org; Stephen Hemminger 
> Subject: Re: [dpdk-dev] [PATCH 2/3] eal: PCI domain should be 32 bits
> 
> On Fri, 23 Jun 2017 00:41:43 +
> "Chang, Cunyin"  wrote:
> 
> > > -Original Message-
> > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > Sent: Thursday, June 22, 2017 11:52 PM
> > > To: Chang, Cunyin 
> > > Cc: dev@dpdk.org; Stephen Hemminger 
> > > Subject: Re: [dpdk-dev] [PATCH 2/3] eal: PCI domain should be 32
> > > bits
> > >
> > > On Thu, 22 Jun 2017 09:28:31 +
> > > "Chang, Cunyin"  wrote:
> > >
> > > > I think the series patches does not cover all area which need to
> > > > adapt to u32 PCI domain, We still need some other work to do:
> > > > we need define another macro such as PCI_PRI_FMT. Something like:
> > > > #define PCI_XXX_PRI_FMT "%.5" PRIx32 ":%.2" PRIx8 ":%.2" PRIx8 ".%"
> > > > PRIx8
> > > >
> > > > PCI_PRI_STR_SIZE also need to be modified:
> > > > #define PCI_PRI_STR_SIZE sizeof("X:XX:XX.X")
> > > >
> > > > The macro PCI_PRI_FMT will not works if The domain exceed 16bits.
> > > > It will impact the following functions:
> > > > 1  RTE_LOG function, there a lots of RTE_LOG such as:
> > > > RTE_LOG(WARNING, EAL,
> > > > "Requested device " PCI_PRI_FMT " cannot be
> > > used\n",
> > > > addr->domain, addr->bus, addr->devid, addr-
> function);
> > > > 2  pci_dump_one_device().
> > > > 3 rte_eal_pci_device_name()
> > > > 4 pci_update_device()
> > > > 5 pci_ioport_map()
> > > > 6 pci_get_uio_dev()
> > > > 7 pci_uio_map_resource_by_index()
> > > > 8 pci_uio_ioport_map()
> > > > 9 pci_vfio_map_resource()
> > > > 10 pci_vfio_unmap_resource()
> > > > All the above functions will related with the macro PCI_PRI_FMT,
> > > >so I think
> > > they need to be modified too.
> > > >
> > > > There are some other code need modify:
> > > > In function rte_eal_compare_pci_addr(), we need do the following
> work:
> > > > dev_addr = ((uint64_t)addr->domain << 24) | ((uint64_t)addr->bus <<
> 16) |
> > > > ((uint64_t)addr->devid << 8) |
> > > (uint64_t)addr->function;
> > > > dev_addr2 = ((uint64_t)addr2->domain << 24) |
> > > > ((uint64_t)addr2->bus <<
> > > 16) |
> > > > ((uint64_t)addr2->devid << 8) |
> > > (uint64_t)addr2->function;
> > > >
> > > > In function eal_parse_pci_BDF(), we need do the following work:
> > > > GET_PCIADDR_FIELD(input, dev_addr->domain, UINT32_MAX, ':');
> > >
> > > Good catch, the string size must be increased.
> > >
> > > It turns out that you don't need to change the PCI print format.
> > > Printing the domain with %.4x works correctly with 32 bit. It just
> > > gets wider. This is how pciutils works, so no change is necessary there.
> >
> > I suppose we should use %4x, not %.4x?, the %.4x will cut the
> 1:05:00.0 as :05:00.0.
> > So the macro:
> > #define PCI_PRI_FMT "%.4" PRIx32 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
> > Should be:
> > #define PCI_PRI_FMT "%4" PRIx32 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
> >
> > Make sense?
> 
> No, that format would not be correct. I want to keep the visible output the
> same for the normal case of 16 bit domains.  Output of printf test program
> shows that %.4x is the correct format to use.
> 
> Domain%4x   %.4x  %4.4x
> 0   0      
> 0x1 1   0001   0001
> 0x1000   1000   1000   1000
> 0x1 1  1  1
> 0x12345678   12345678   12345678   12345678
> 0xdeadbeef   deadbeef   deadbeef   deadbeef
> 
Looks good. No more questions about this.


Re: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode parameter

2017-06-25 Thread Vasily Philipov


> -Original Message-
> From: Wu, Jingjing [mailto:jingjing...@intel.com]
> Sent: Thursday, June 22, 2017 04:13
> To: Vasily Philipov ; dev@dpdk.org
> Cc: Adrien Mazarguil ; Nélio Laranjeiro
>   
> Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> parameter
> 
> 
> 
> > -Original Message-
> > From: Vasily Philipov [mailto:vasi...@mellanox.com]
> > Sent: Wednesday, June 21, 2017 5:44 PM
> > To: Wu, Jingjing ; dev@dpdk.org
> > Cc: Adrien Mazarguil ; Nélio Laranjeiro
> > 
> > Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated mode
> > parameter
> >
> >
> >
> > > -Original Message-
> > > From: Wu, Jingjing [mailto:jingjing...@intel.com]
> > > Sent: Tuesday, June 20, 2017 04:27
> > > To: Vasily Philipov ; dev@dpdk.org
> > > Cc: Adrien Mazarguil ; Nélio Laranjeiro
> > > 
> > > Subject: RE: [dpdk-dev] [PATCH v4 4/4] app/testpmd: add isolated
> > > mode parameter
> > >
> > >
> > > > +/*
> > > >   * Avoids to check link status when starting/stopping a port.
> > > >   */
> > > >  uint8_t no_link_check = 0; /* check by default */ @@ -1422,6
> > > > +1427,15 @@ static void eth_event_callback(uint8_t port_id,
> > > > if (port->need_reconfig > 0) {
> > > > port->need_reconfig = 0;
> > > >
> > > > +   if (isolated_mode) {
> > > > +   int ret = port_flow_isolate(pi, 1);
> > > > +   if (ret) {
> > > > +   printf("Failed to apply 
> > > > isolated"
> > > > +  " mode on port %d\n", 
> > > > pi);
> > > > +   return -1;
> > > > +   }
> > > > +   }
> > > > +
> > > Should it block the app startup if isolated-mode setting fails?
> >
> > if isolated mode cannot be enabled on any port, that port cannot be
> > initialized and that causes testpmd to quit, at least it won't go
> > against the user's wishes
> 
> If so, I prefer the isolated_mode to be port's argument but not global one.

Could you please provide some example of such argument - I will take it as a 
reference...


> How about to add a command to configure the isolate mode?

This is already exists - just type "flow isolate {port_id} {boolean}" from 
testpmd prompt 
Please see at:

http://dpdk.org/ml/archives/dev/2017-April/064327.html
http://dpdk.org/dev/patchwork/patch/23741/







[dpdk-dev] [PATCH v7 1/3] lib: add Generic Receive Offload API framework

2017-06-25 Thread Jiayu Hu
Generic Receive Offload (GRO) is a widely used SW-based offloading
technique to reduce per-packet processing overhead. It gains
performance by reassembling small packets into large ones. This
patchset is to support GRO in DPDK. To support GRO, this patch
implements a GRO API framework.

To enable more flexibility to applications, DPDK GRO is implemented as
a user library. Applications explicitly use the GRO library to merge
small packets into large ones. DPDK GRO provides two reassembly modes.
One is called lightweigth mode, the other is called heavyweight mode.
If applications want to merge packets in a simple way and the number
of packets is relatively small, they can use the lightweigth mode.
If applications need more fine-grained controls, they can choose the
heavyweigth mode.

rte_gro_reassemble_burst is the main reassembly API which is used in
lightweigth mode and processes N packets at a time. For applications,
performing GRO in lightweigth mode is simple. They just need to invoke
rte_gro_reassemble_burst. Applications can get GROed packets as soon as
rte_gro_reassemble_burst returns.

rte_gro_reassemble is the main reassembly API which is used in
heavyweight mode and processes one packet at a time. For applications,
performing GRO in heavyweigth mode is relatively complicated. Before
performing GRO, applications need to create a GRO table by
rte_gro_tbl_create. Then they can use rte_gro_reassemble to process
packets one by one. The processed packets are in the GRO table. If
applications want to get them, applications need to manually flush
them by flush APIs.

Signed-off-by: Jiayu Hu 
---
 config/common_base |   5 +
 lib/Makefile   |   2 +
 lib/librte_gro/Makefile|  50 +
 lib/librte_gro/rte_gro.c   | 125 ++
 lib/librte_gro/rte_gro.h   | 205 +
 lib/librte_gro/rte_gro_version.map |  12 +++
 mk/rte.app.mk  |   1 +
 7 files changed, 400 insertions(+)
 create mode 100644 lib/librte_gro/Makefile
 create mode 100644 lib/librte_gro/rte_gro.c
 create mode 100644 lib/librte_gro/rte_gro.h
 create mode 100644 lib/librte_gro/rte_gro_version.map

diff --git a/config/common_base b/config/common_base
index f6aafd1..167f5ef 100644
--- a/config/common_base
+++ b/config/common_base
@@ -712,6 +712,11 @@ CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=n
 
 #
+# Compile GRO library
+#
+CONFIG_RTE_LIBRTE_GRO=y
+
+#
 #Compile Xen domain0 support
 #
 CONFIG_RTE_LIBRTE_XEN_DOM0=n
diff --git a/lib/Makefile b/lib/Makefile
index 07e1fd0..ac1c2f6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -106,6 +106,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
 DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro
+DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
new file mode 100644
index 000..7e0f128
--- /dev/null
+++ b/lib/librte_gro/Makefile
@@ -0,0 +1,50 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include

[dpdk-dev] [PATCH v7 0/3] Support TCP/IPv4 GRO in DPDK

2017-06-25 Thread Jiayu Hu
Generic Receive Offload (GRO) is a widely used SW-based offloading
technique to reduce per-packet processing overhead. It gains performance
by reassembling small packets into large ones. Therefore, we propose to
support GRO in DPDK.

To enable more flexibility to applications, DPDK GRO is implemented as
a user library. Applications explicitly use the GRO library to merge
small packets into large ones. DPDK GRO provides two reassembly modes:
lightweigth mode and heavyweight mode. If applications want to merge
packets in a simple way, they can select the lightweight mode API. If
applications need more fine-grained controls, they can select the
heavyweigth mode API.

This patchset is to support TCP/IPv4 GRO in DPDK. The first patch is to
provide a GRO API framework. The second patch is to support TCP/IPv4 GRO.
The last patch is to enable TCP/IPv4 GRO in testpmd.

We perform many iperf tests to see the performance gains from DPDK GRO.
The test environment is:
a. two 25Gbps physical ports (p0 and p1) are linked together. Assign p0
to one networking namespace and assign p1 to DPDK;
b. enable TSO for p0. Run iperf client on p0;
c. launch testpmd with p1 and a vhost-user port, and run it in csum
forwarding mode. Select TCP HW checksum calculation for the
vhost-user port in csum forwarding engine. And for better
performance, we select IPv4 and TCP HW checksum calculation for p1
too;
d. launch a VM with one CPU core and a virtio-net port. The VM OS is
ubuntu 16.04 whose virtio-net driver supports GRO. Enables RX csum
offloading and mrg_rxbuf for the VM. Iperf server runs in the VM;
e. to run iperf tests, we need to avoid the csum forwarding engine
compulsorily changes packet mac addresses. SO in our tests, we
comment these codes out (line701 ~ line704 in csumonly.c).

In each test, we run iperf with the following three configurations:
- single flow and single TCP client thread 
- multiple flows and single TCP client thread
- single flow and parallel TCP client threads

We run above iperf tests on three scenarios:
s1: disabling kernel GRO and enabling DPDK GRO
s2: disabling kernel GRO and disabling DPDK GRO
s3: enabling kernel GRO and disabling DPDK GRO
Comparing the throughput of s1 with s2, we can see the performance gains
from DPDK GRO. Comparing the throughput of s1 and s3, we can compare DPDK
GRO performance with kernel GRO performance.

Test results:
- DPDK GRO throughput is almost 2 times than the throughput of no
DPDK GRO and no kernel GRO;
- DPDK GRO throughput is almost 1.2 times than the throughput of
kernel GRO.

Change log
==
v7:
- add a macro 'GRO_MAX_BURST_ITEM_NUM' to avoid stack overflow in
rte_gro_reassemble_burst
- change macro name (_NB to _NUM)
- add '#ifdef __cplusplus ...' in rte_gro.h
v6:
- avoid checksum validation and calculation
- enable to process IP fragmented packets
- add a command in testpmd
- update documents
- modify rte_gro_timeout_flush and rte_gro_reassemble_burst
- rename veriable name
v5:
- fix some bugs
- fix coding style issues
v4:
- implement DPDK GRO as an application-used library
- introduce lightweight and heavyweight working modes to enable
fine-grained controls to applications
- replace cuckoo hash tables with simpler table structure
v3:
- fix compilation issues.
v2:
- provide generic reassembly function;
- implement GRO as a device ability:
add APIs for devices to support GRO;
add APIs for applications to enable/disable GRO;
- update testpmd example. 

Jiayu Hu (3):
  lib: add Generic Receive Offload API framework
  lib/gro: add TCP/IPv4 GRO support
  app/testpmd: enable TCP/IPv4 GRO

 app/test-pmd/cmdline.c  | 125 +
 app/test-pmd/config.c   |  37 +++
 app/test-pmd/csumonly.c |   5 +
 app/test-pmd/testpmd.c  |   3 +
 app/test-pmd/testpmd.h  |  11 +
 config/common_base  |   5 +
 doc/guides/rel_notes/release_17_08.rst  |   7 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  34 +++
 lib/Makefile|   2 +
 lib/librte_gro/Makefile |  51 
 lib/librte_gro/rte_gro.c| 218 +++
 lib/librte_gro/rte_gro.h| 209 +++
 lib/librte_gro/rte_gro_tcp.c| 394 
 lib/librte_gro/rte_gro_tcp.h| 191 ++
 lib/librte_gro/rte_gro_version.map  |  12 +
 mk/rte.app.mk   |   1 +
 16 files changed, 1305 insertions(+)
 create mode 100644 lib/librte_gro/Makefile
 create mode 100644 lib/librte_gro/rte_gro.c
 create mode 100644 lib/librte_gro/rte_gro.h
 create mode 100644 lib/librte_gro/rte_gro_tcp.c
 create mode 100644 lib/librte_gro/rte_gro_tcp.h
 create mode 100644

[dpdk-dev] [PATCH v7 3/3] app/testpmd: enable TCP/IPv4 GRO

2017-06-25 Thread Jiayu Hu
This patch enables TCP/IPv4 GRO library in csum forwarding engine.
By default, GRO is turned off. Users can use command "gro (on|off)
(port_id)" to enable or disable GRO for a given port. If a port is
enabled GRO, all TCP/IPv4 packets received from the port are performed
GRO. Besides, users can set max flow number and packets number per-flow
by command "gro set (max_flow_num) (max_item_num_per_flow) (port_id)".

Signed-off-by: Jiayu Hu 
---
 app/test-pmd/cmdline.c  | 125 
 app/test-pmd/config.c   |  37 
 app/test-pmd/csumonly.c |   5 ++
 app/test-pmd/testpmd.c  |   3 +
 app/test-pmd/testpmd.h  |  11 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  34 
 6 files changed, 215 insertions(+)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index ff8ffd2..cb359e1 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -76,6 +76,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -419,6 +420,14 @@ static void cmd_help_long_parsed(void *parsed_result,
"tso show (portid)"
"Display the status of TCP Segmentation 
Offload.\n\n"
 
+   "gro (on|off) (port_id)"
+   "Enable or disable Generic Receive Offload in io"
+   " forward engine.\n\n"
+
+   "gro set (max_flow_num) (max_item_num_per_flow) 
(port_id)\n"
+   "Set max flow number and max packet number per-flow"
+   " for GRO.\n\n"
+
"set fwd (%s)\n"
"Set packet forwarding mode.\n\n"
 
@@ -3827,6 +3836,120 @@ cmdline_parse_inst_t cmd_tunnel_tso_show = {
},
 };
 
+/* *** SET GRO FOR A PORT *** */
+struct cmd_gro_result {
+   cmdline_fixed_string_t cmd_keyword;
+   cmdline_fixed_string_t mode;
+   uint8_t port_id;
+};
+
+static void
+cmd_enable_gro_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_gro_result *res;
+
+   res = parsed_result;
+   setup_gro(res->mode, res->port_id);
+}
+
+cmdline_parse_token_string_t cmd_gro_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
+   cmd_keyword, "gro");
+cmdline_parse_token_string_t cmd_gro_mode =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
+   mode, "on#off");
+cmdline_parse_token_num_t cmd_gro_pid =
+   TOKEN_NUM_INITIALIZER(struct cmd_gro_result,
+   port_id, UINT8);
+
+cmdline_parse_inst_t cmd_enable_gro = {
+   .f = cmd_enable_gro_parsed,
+   .data = NULL,
+   .help_str = "gro (on|off) (port_id)",
+   .tokens = {
+   (void *)&cmd_gro_keyword,
+   (void *)&cmd_gro_mode,
+   (void *)&cmd_gro_pid,
+   NULL,
+   },
+};
+
+/* *** SET MAX FLOW NUMBER AND ITEM NUM PER FLOW FOR GRO *** */
+struct cmd_gro_set_result {
+   cmdline_fixed_string_t gro;
+   cmdline_fixed_string_t mode;
+   uint16_t flow_num;
+   uint16_t item_num_per_flow;
+   uint8_t port_id;
+};
+
+static void
+cmd_gro_set_parsed(void *parsed_result,
+  __attribute__((unused)) struct cmdline *cl,
+  __attribute__((unused)) void *data)
+{
+   struct cmd_gro_set_result *res = parsed_result;
+
+   if (port_id_is_invalid(res->port_id, ENABLED_WARN))
+   return;
+   if (test_done == 0) {
+   printf("Before set GRO flow_num and item_num_per_flow,"
+   " please stop forwarding first\n");
+   return;
+   }
+
+   if (!strcmp(res->mode, "set")) {
+   if (res->flow_num == 0)
+   printf("Invalid flow number. Revert to default value:"
+   " %u.\n", GRO_DEFAULT_FLOW_NUM);
+   else
+   gro_ports[res->port_id].param.max_flow_num =
+   res->flow_num;
+
+   if (res->item_num_per_flow == 0)
+   printf("Invalid item number per-flow. Revert"
+   " to default value:%u.\n",
+   GRO_DEFAULT_ITEM_NUM_PER_FLOW);
+   else
+   gro_ports[res->port_id].param.max_item_per_flow =
+   res->item_num_per_flow;
+   }
+}
+
+cmdline_parse_token_string_t cmd_gro_set_gro =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_set_result,
+   gro, "gro");
+cmdline_parse_token_string_t cmd_gro_set_mode =
+   TOKEN_STRING_INITIALIZER(struct cmd_gro_set_result,
+   mode, "set");
+cmdline_parse_token_num_t cmd_gro

[dpdk-dev] [PATCH v7 2/3] lib/gro: add TCP/IPv4 GRO support

2017-06-25 Thread Jiayu Hu
In this patch, we introduce five APIs to support TCP/IPv4 GRO.
- gro_tcp_tbl_create: create a TCP reassembly table, which is used to
merge packets.
- gro_tcp_tbl_destroy: free memory space of a TCP reassembly table.
- gro_tcp_tbl_flush: flush all packets from a TCP reassembly table.
- gro_tcp_tbl_timeout_flush: flush timeout packets from a TCP
reassembly table.
- gro_tcp4_reassemble: reassemble an inputted TCP/IPv4 packet.

TCP/IPv4 GRO API assumes all inputted packets are with correct IPv4
and TCP checksums. And TCP/IPv4 GRO API doesn't update IPv4 and TCP
checksums for merged packets. If inputted packets are IP fragmented,
TCP/IPv4 GRO API assumes they are complete packets (i.e. with L4
headers).

In TCP GRO, we use a table structure, called TCP reassembly table, to
reassemble packets. Both TCP/IPv4 and TCP/IPv6 GRO use the same table
structure. A TCP reassembly table includes a key array and a item array,
where the key array keeps the criteria to merge packets and the item
array keeps packet information.

One key in the key array points to an item group, which consists of
packets which have the same criteria value. If two packets are able to
merge, they must be in the same item group. Each key in the key array
includes two parts:
- criteria: the criteria of merging packets. If two packets can be
merged, they must have the same criteria value.
- start_index: the index of the first incoming packet of the item group.

Each element in the item array keeps the information of one packet. It
mainly includes two parts:
- pkt: packet address
- next_pkt_index: the index of the next packet in the same item group.
All packets in the same item group are chained by next_pkt_index.
With next_pkt_index, we can locate all packets in the same item
group one by one.

To process an incoming packet needs three steps:
a. check if the packet should be processed. Packets with the following
properties won't be processed:
- packets without data (e.g. SYN, SYN-ACK)
b. traverse the key array to find a key which has the same criteria
value with the incoming packet. If find, goto step c. Otherwise,
insert a new key and insert the packet into the item array.
c. locate the first packet in the item group via the start_index in the
key. Then traverse all packets in the item group via next_pkt_index.
If find one packet which can merge with the incoming one, merge them
together. If can't find, insert the packet into this item group.

Signed-off-by: Jiayu Hu 
---
 doc/guides/rel_notes/release_17_08.rst |   7 +
 lib/librte_gro/Makefile|   1 +
 lib/librte_gro/rte_gro.c   | 123 --
 lib/librte_gro/rte_gro.h   |   6 +-
 lib/librte_gro/rte_gro_tcp.c   | 394 +
 lib/librte_gro/rte_gro_tcp.h   | 191 
 6 files changed, 706 insertions(+), 16 deletions(-)
 create mode 100644 lib/librte_gro/rte_gro_tcp.c
 create mode 100644 lib/librte_gro/rte_gro_tcp.h

diff --git a/doc/guides/rel_notes/release_17_08.rst 
b/doc/guides/rel_notes/release_17_08.rst
index 842f46f..f067247 100644
--- a/doc/guides/rel_notes/release_17_08.rst
+++ b/doc/guides/rel_notes/release_17_08.rst
@@ -75,6 +75,13 @@ New Features
 
   Added support for firmwares with multiple Ethernet ports per physical port.
 
+* **Add Generic Receive Offload API support.**
+
+  Generic Receive Offload (GRO) API supports to reassemble TCP/IPv4
+  packets. GRO API assumes all inputted packets are with correct
+  checksums. GRO API doesn't update checksums for merged packets. If
+  inputted packets are IP fragmented, GRO API assumes they are complete
+  packets (i.e. with L4 headers).
 
 Resolved Issues
 ---
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index 7e0f128..e89344d 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -43,6 +43,7 @@ LIBABIVER := 1
 
 # source files
 SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro_tcp.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 33275e8..5b89928 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -32,11 +32,15 @@
 
 #include 
 #include 
+#include 
 
 #include "rte_gro.h"
+#include "rte_gro_tcp.h"
 
-static gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NUM];
-static gro_tbl_destroy_fn tbl_destroy_functions[GRO_TYPE_MAX_NUM];
+static gro_tbl_create_fn tbl_create_functions[GRO_TYPE_MAX_NUM] = {
+   gro_tcp_tbl_create, NULL};
+static gro_tbl_destroy_fn tbl_destroy_functions[GRO_TYPE_MAX_NUM] = {
+   gro_tcp_tbl_destroy, NULL};
 
 struct rte_gro_tbl *rte_gro_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
@@ -94,32 +98,121 @@ void rte_gro_tbl_destroy(struct rte_gro_tbl *gro_tbl)
 }
 
 uint16_t
-rte_gro_reassemble_burst(struct rte_mbuf **pkts __rte_unused,

[dpdk-dev] [PATCH] eal: fix wrong config file path

2017-06-25 Thread Jianfeng Tan
When primary process is booted with --file-prefix option, the API,
rte_eal_primary_proc_alive(), uses a wrong config file path to
check if primary process is alive.

Fix it by calling helper function to get config file path.

Fixes: dd3e00138d74 ("eal: check if primary process is alive")
Cc: sta...@dpdk.org

Signed-off-by: Jianfeng Tan 
---
 lib/librte_eal/common/eal_common_proc.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_proc.c 
b/lib/librte_eal/common/eal_common_proc.c
index 12e0fca..60526ca 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -46,10 +46,10 @@ rte_eal_primary_proc_alive(const char *config_file_path)
if (config_file_path)
config_fd = open(config_file_path, O_RDONLY);
else {
-   char default_path[PATH_MAX+1];
-   snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
-default_config_dir, "rte");
-   config_fd = open(default_path, O_RDONLY);
+   const char *path;
+
+   path = eal_runtime_config_path();
+   config_fd = open(path, O_RDONLY);
}
if (config_fd < 0)
return 0;
-- 
2.7.4