[dpdk-dev] [PATCH v5] devtools: rework abi checker script

2017-10-05 Thread Olivier Matz
The initial version of the script had some limitations:
- cannot work on a non-clean workspace
- environment variables are not documented
- no compilation log in case of failure
- return success even it abi is incompatible

This patch addresses these issues and rework the code.

Signed-off-by: Olivier Matz 
Acked-by: Neil Horman 
---

v4->v5:
- Add a log when shared lib has no public ABI

v3->v4:
- clarify logs on incompatible abi
- log when an error returned an error
- [really] fix the report path
- log the output of make config in the proper file

v2->v3:
- fix when not launched from dpdk root dir
- use "-Og -Wno-error" instead of "-O0"
- fix typo in commit log

v1->v2:
- use /usr/bin/env to find bash (which is required)
- fix displayed path to html reports
- reword help for -f option

 devtools/validate-abi.sh | 397 ---
 1 file changed, 205 insertions(+), 192 deletions(-)

diff --git a/devtools/validate-abi.sh b/devtools/validate-abi.sh
index 0accc99b1..8caf43e83 100755
--- a/devtools/validate-abi.sh
+++ b/devtools/validate-abi.sh
@@ -1,7 +1,8 @@
-#!/bin/sh
+#!/usr/bin/env bash
 #   BSD LICENSE
 #
 #   Copyright(c) 2015 Neil Horman. All rights reserved.
+#   Copyright(c) 2017 6WIND S.A.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -27,236 +28,248 @@
 #   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 #   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-TAG1=$1
-TAG2=$2
-TARGET=$3
-ABI_DIR=`mktemp -d -p /tmp ABI.XX`
+set -e
 
-usage() {
-   echo "$0   "
-}
+abicheck=abi-compliance-checker
+abidump=abi-dumper
+default_dst=abi-check
+default_target=x86_64-native-linuxapp-gcc
 
-log() {
-   local level=$1
-   shift
-   echo "$*"
+# trap on error
+err_report() {
+echo "$0: error at line $1"
 }
+trap 'err_report $LINENO' ERR
 
-validate_tags() {
+print_usage () {
+   cat <<- END_OF_HELP
+   $(basename $0) [options]  
 
-   if [ -z "$HASH1" ]
-   then
-   echo "invalid revision: $TAG1"
-   return
-   fi
-   if [ -z "$HASH2" ]
-   then
-   echo "invalid revision: $TAG2"
-   return
-   fi
+   This script compares the ABI of 2 git revisions of the current
+   workspace. The output is a html report and a compilation log.
+
+   The objective is to make sure that applications built against
+   DSOs from the first revision can still run when executed using
+   the DSOs built from the second revision.
+
+and  are git commit id or tags.
+
+   Options:
+ -hshow this help
+ -j   enable parallel compilation with  threads
+ -vshow compilation logs on the console
+ -d   change working directory (default is ${default_dst})
+ -tthe dpdk target to use (default is ${default_target})
+ -foverwrite existing files in destination directory
+
+   The script returns 0 on success, or the value of last failing
+   call of ${abicheck} (incompatible abi or the tool has run with errors).
+   The errors returned by ${abidump} are ignored.
+
+   END_OF_HELP
 }
 
-validate_args() {
-   if [ -z "$TAG1" ]
-   then
-   echo "Must Specify REV1"
-   return
-   fi
-   if [ -z "$TAG2" ]
-   then
-   echo "Must Specify REV2"
-   return
-   fi
-   if [ -z "$TARGET" ]
-   then
-   echo "Must Specify a build target"
+# log in the file, and on stdout if verbose
+# $1: level string
+# $2: string to be logged
+log() {
+   echo "$1: $2"
+   if [ "${verbose}" != "true" ]; then
+   echo "$1: $2" >&3
fi
 }
 
+# launch a command and log it, taking care of surrounding spaces with quotes
+cmd() {
+   local i s whitespace ret
+   s=""
+   whitespace="[[:space:]]"
+   for i in "$@"; do
+   if [[ $i =~ $whitespace ]]; then
+   i=\"$i\"
+   fi
+   if [ -z "$s" ]; then
+   s="$i"
+   else
+   s="$s $i"
+   fi
+   done
+
+   ret=0
+   log "CMD" "$s"
+   "$@" || ret=$?
+   if [ "$ret" != "0" ]; then
+   log "CMD" "previous command returned $ret"
+   fi
+
+   return $ret
+}
 
-cleanup_and_exit() {
-   rm -rf $ABI_DIR
-   git checkout $CURRENT_BRANCH
-   exit $1
+# redirect or copy stderr/stdout to a file
+# the syntax is unfamiliar, but it makes the rest of the
+# code easier to read, avoiding the use of pipes
+set_log_file() {
+   # save original stdout and stderr in fd 3 and 4
+   exec 3>&1
+   exec 4>&2
+   # create a new fd 5 that send to a file
+   exec 5> >(cat > $1)
+   # send stdout and stderr to fd 5
+   if [ "${verbose}" = "true" ]; then
+ 

Re: [dpdk-dev] [PATCH v7 4/8] ethdev: add GTP items to support flow API

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Xing, Beilei
> Sent: Friday, September 29, 2017 11:51 PM
> To: Wu, Jingjing 
> Cc: Chilikin, Andrey ; dev@dpdk.org
> Subject: [PATCH v7 4/8] ethdev: add GTP items to support flow API
> 
> This patch adds GTP, GTPC and GTPU items for
> generic flow API, and also exposes item fields
> through the flow command.
> 
> Signed-off-by: Beilei Xing 
> Acked-by: Adrien Mazarguil 

Acked-by: Jingjing Wu 


Re: [dpdk-dev] [PATCH v7 8/8] net/i40e: enable cloud filter for GTP-C and GTP-U

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Xing, Beilei
> Sent: Friday, September 29, 2017 11:51 PM
> To: Wu, Jingjing 
> Cc: Chilikin, Andrey ; dev@dpdk.org
> Subject: [PATCH v7 8/8] net/i40e: enable cloud filter for GTP-C and GTP-U
> 
> This patch sets TEID of GTP-C and GTP-U as filter type
> by replacing existed filter types inner_mac and TUNNEL_KEY.
> This configuration will be set when adding GTP-C or
> GTP-U filter rules, and it will be invalid only by
> NIC core reset.
> 
> Signed-off-by: Beilei Xing 
Acked-by: Jingjing Wu 



Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support flow API

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Sean Harte [mailto:sea...@gmail.com]
> Sent: Tuesday, October 3, 2017 4:57 PM
> To: Adrien Mazarguil 
> Cc: Xing, Beilei ; Wu, Jingjing 
> ; Chilikin,
> Andrey ; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support flow 
> API
> 
> On 2 October 2017 at 13:27, Adrien Mazarguil  
> wrote:
> > On Fri, Sep 29, 2017 at 10:29:55AM +0100, Sean Harte wrote:
> >> On 29 September 2017 at 09:54, Xing, Beilei  wrote:
> > 
> >> >> >  /**
> >> >> > + * RTE_FLOW_ITEM_TYPE_GTP.
> >> >> > + *
> >> >> > + * Matches a GTPv1 header.
> >> >> > + */
> >> >> > +struct rte_flow_item_gtp {
> >> >> > +   /**
> >> >> > +* Version (3b), protocol type (1b), reserved (1b),
> >> >> > +* Extension header flag (1b),
> >> >> > +* Sequence number flag (1b),
> >> >> > +* N-PDU number flag (1b).
> >> >> > +*/
> >> >> > +   uint8_t v_pt_rsv_flags;
> >> >> > +   uint8_t msg_type; /**< Message type. */
> >> >> > +   rte_be16_t msg_len; /**< Message length. */
> >> >> > +   rte_be32_t teid; /**< Tunnel endpoint identifier. */ };
> >> >>
> >> >> In future, you might add support for GTPv2 (which is used since LTE).
> >> >> Maybe this structure should have v1 in its name to avoid confusion?
> >> >
> >> > I considered it before. But I think we can modify it when we support 
> >> > GTPv2 in future,
> and keep concise 'GTP' currently:)  since I have described it matches v1 
> header.
> >> >
> >>
> >> You could rename v_pt_rsv_flags to version_flags to avoid some future
> >> code changes to support GTPv2. There's still the issue that not all
> >> GTPv2 messages have a TEID though.
> >
> > Although they have the same size, the header of these two protocols
> > obviously differs. My suggestion would be to go with a separate GTPv2
> > pattern item using its own dedicated structure instead.
> >
> > --
> > Adrien Mazarguil
> > 6WIND
> 
> The 1st four bytes are the same (flags in first byte have different
> meanings, but the bits indicating the version are in the same
> location). After that, different fields in each version are optional,
> and the headers have variable size. A single structure could be used
> if the first field is renamed to something like "version_flags", and
> then check that the teid field in item->mask is not set if
> ((version_flags >> 5 == 2) && ((version_flags >> 4) & 1) == 1). If
> there's going to be two structures, it would be good to put v1 and v2
> in the names, in my opinion.

I think the name GTP is OK for now. Due to v1 and v2 are different, why not 
rename them
when the v2 supporting are introduced?






Re: [dpdk-dev] [PATCH v4 4/4] eventdev: Add tests for event eth Rx adapter APIs

2017-10-05 Thread Pavan Nikhilesh Bhagavatula
On Thu, Oct 05, 2017 at 11:27:53AM +0530, Rao, Nikhil wrote:
> On 10/3/2017 5:06 PM, Pavan Nikhilesh Bhagavatula wrote:
> >On Fri, Sep 22, 2017 at 02:47:14AM +0530, Nikhil Rao wrote:
> >
> >Hi Nikhil,
> >
> >
> >>Add unit tests for rte_event_eth_rx_adapter_xxx() APIs
> >>
> >>Signed-off-by: Nikhil Rao 
> >>---
> >>  test/test/test_event_eth_rx_adapter.c | 399 
> >> ++
> >>  test/test/Makefile|   1 +
> >>  2 files changed, 400 insertions(+)
> >>  create mode 100644 test/test/test_event_eth_rx_adapter.c
> >>
> >>diff --git a/test/test/test_event_eth_rx_adapter.c 
> >>b/test/test/test_event_eth_rx_adapter.c
> >>new file mode 100644
> >>index 0..5d448dc27
> >
> >>+
> >>+static int
> >>+testsuite_setup(void)
> >>+{
> >>+   int err;
> >>+   err = init_ports(rte_eth_dev_count());
> >>+   TEST_ASSERT(err == 0, "Port initialization failed err %d\n", err);
> >>+
> >>+   struct rte_event_dev_config config = {
> >>+   .nb_event_queues = 1,
> >>+   .nb_event_ports = 1,
> >>+   .nb_events_limit  = 4096,
> >>+   .nb_event_queue_flows = 1024,
> >>+   .nb_event_port_dequeue_depth = 16,
> >>+   .nb_event_port_enqueue_depth = 16
> >>+   };
> >>+
> >
> >Some eth devices like octeontx[1] use event device to receive packets, So in
> >this special case it would require to stop the event device before 
> >configuring
> >the event device as it is already started in port_init.
> >
> >Calling rte_event_dev_stop(0) here would satisfy such use case.
>
> Hi Pavan,
>
> port_init is starting the eth device not the event device.

If eth_octeontx is the eth device It uses event_octeontx to work. So, when
rte_eth_dev_start is called in port_init it invokes rte_event_dev_start
internally.

>
> Moving init_ports to after rte_event_dev_configure should also work ?

Yep, this works too.

>
> >
> >[1] http://dpdk.org/ml/archives/dev/2017-August/073982.html
> >
> >>+   err = rte_event_dev_configure(0, &config);
> >>+   TEST_ASSERT(err == 0, "Event device initialization failed err %d\n",
> >>+   err);
> >>+
> >>+   err = rte_event_eth_rx_adapter_caps_get(0, 0, &default_params.caps);
> >>+   TEST_ASSERT(err == 0, "Failed to get adapter cap err %d\n",
> >
> >
> >
> >>+
> >>+static int
> >>+adapter_queue_add_del(void)
> >>+{
> >>+   int err;
> >>+   struct rte_event ev;
> >>+   uint32_t cap;
> >>+
> >>+   struct rte_event_eth_rx_adapter_queue_conf queue_config;
> >>+
> >>+   err = rte_event_eth_rx_adapter_caps_get(0, 0, &cap);
> >>+   TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> >>+
> >>+   ev.queue_id = 0;
> >>+   ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
> >>+   ev.priority = 0;
> >>+
> >>+   queue_config.rx_queue_flags = 0;
> >>+   if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_FLOW_ID)) {
> >>+   ev.flow_id = 1;
> >>+   queue_config.rx_queue_flags =
> >>+   RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID;
> >>+   }
> >>+   queue_config.ev = ev;
> >>+   queue_config.servicing_weight = 1;
> >>+
> >
> >As mentioned above[1] in case of HW accelerated coprocessors the eth_port has
> >to be stopped before reconfiguring the eth queue to event queue remapping.
> >Calling rte_eth_dev_stop(0) is required before trying to map the eth queue.
> >
>
> Is it possible to do this internally within the queue_add call ?

It is possible to handle this internally.
AFAIK it is a very specific case that exists when we are using eth_octeontx and
event_octeontx. So, I think this changes is not required.

>
> If not, the application would call rte_eth_dev_stop() if
> RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is set or do we need a separate
> capability for this ?
>
> >>+   err = rte_event_eth_rx_adapter_queue_add(0, rte_eth_dev_count(),
> >>+   -1, &queue_config);
> >>+   TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
> >>+
> >>+   if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_SINGLE_EVENTQ)) {
> >>+   err = rte_event_eth_rx_adapter_queue_add(0, 0, 0,
> >>+   &queue_config);
> >>+   TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> >>+
> >>+   err = rte_event_eth_rx_adapter_queue_del(0, 0, 0);
> >>+   TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> >>+
> >>+   err = rte_event_eth_rx_adapter_queue_add(0, 0, -1,
> >>+   &queue_config);
> >>+   TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> >>+
> >>+   err = rte_event_eth_rx_adapter_queue_del(0, 0, -1);
> >>+   TEST_ASSERT(err == 0, "Expected 0 got %d", err);
> >>+   } else {
> >>+   err = rte_event_eth_rx_adapter_queue_add(0, 0, 0,
> >>+   &queue_config);
> >>+   TEST_ASSERT(err == -EINVAL, "Expected EINVAL got %d", err);
> >>+
> >>
> >
> >
> >Thanks,
> >Pavan

Re: [dpdk-dev] [PATCH v4 3/4] eventdev: Add eventdev ethernet Rx adapter

2017-10-05 Thread Rao, Nikhil

On 10/3/2017 7:22 PM, Jerin Jacob wrote:

-Original Message-

Date: Sun, 24 Sep 2017 23:46:51 +0530
From: "Rao, Nikhil" 
To: Jerin Jacob 
CC: bruce.richard...@intel.com, gage.e...@intel.com, dev@dpdk.org,
  tho...@monjalon.net, harry.van.haa...@intel.com, hemant.agra...@nxp.com,
  nipun.gu...@nxp.com, narender.vang...@intel.com,
  erik.g.carri...@intel.com, abhinandan.guj...@intel.com,
  santosh.shu...@caviumnetworks.com
Subject: Re: [PATCH v4 3/4] eventdev: Add eventdev ethernet Rx adapter
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101
  Thunderbird/52.3.0


OK, Thanks for the detailed review. Will add the programmer guide to RC1.


OK. Thanks.








Yes, if create() and queue_add() are called from different processes, it
wouldn't work.


+
+static uint8_t default_rss_key[] = {
+   0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+   0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+   0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+   0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+   0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};


Looks like the scope of this array is only for rte_event_eth_rx_adapter_init,
if so please move it to stack.


OK.




+static uint8_t *rss_key_be;


Can we remove this global variable add it in in adapter memory?



There is currently struct rte_event_eth_rx_adapter
**rte_event_eth_rx_adapter that is an array of pointers to the adapters.
rss_key_be points to memory after this array.

are you thinking of something like:

struct {
struct rte_event_eth_rx_adapter **rte_event_eth_rx_adapter
uint8_t *rss_key_be;
} global;


I was thinking, to hold 40B in struct rte_event_eth_rx_adapter for
rss_key_be and initialize per rx_adapter to avoid global variable
as fill_event_buffer() has access to rte_event_eth_rx_adapter.

Something like below as rough idea.
➜ [dpdk-next-eventdev] $ git diff
lib/librte_eventdev/rte_event_eth_rx_adapter.c
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index cd19e7c28..ba6148931 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -37,6 +37,7 @@ struct rte_eth_event_enqueue_buffer {
  };
  
  struct rte_event_eth_rx_adapter {

+   uint8_t rss_key[40];
 /* event device identifier */
 uint8_t eventdev_id;
 /* per ethernet device structure */


OK.



+
+static int
+default_conf_cb(uint8_t id, uint8_t dev_id,
+   struct rte_event_eth_rx_adapter_conf *conf, void *arg)
+{
+
+   ret = rte_event_port_setup(dev_id, port_id, port_conf);
+   if (ret) {
+   RTE_EDEV_LOG_ERR("failed to setup event port %u\n",
+   port_id);


return or add goto to exit from here to avoid calling rte_event_dev_start below


Could do the return but I wanted to leave the device in the same state as it
was at entry into this function. Thoughts ?


Will calling rte_event_dev_start() down(in case if wont return) change
the state? if not, it is fine.



OK, will put in the return. if the device were configured with an 
additional port and the setup for this port fails. The 
rte_event_dev_start() call will dereference a NULL ptr.


Nikhil




No another comments. Looks good to me.






[dpdk-dev] [PATCH v8 3/7] ethdev: add GTP items to support flow API

2017-10-05 Thread Beilei Xing
This patch adds GTP, GTPC and GTPU items for
generic flow API, and also exposes item fields
through the flow command.

Signed-off-by: Beilei Xing 
Acked-by: Adrien Mazarguil 
Acked-by: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 40 ++
 app/test-pmd/config.c   |  3 ++
 doc/guides/prog_guide/rte_flow.rst  | 17 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  4 +++
 lib/librte_ether/rte_flow.h | 52 +
 5 files changed, 116 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index a17a004..26c3e4f 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -171,6 +171,10 @@ enum index {
ITEM_GRE_PROTO,
ITEM_FUZZY,
ITEM_FUZZY_THRESH,
+   ITEM_GTP,
+   ITEM_GTP_TEID,
+   ITEM_GTPC,
+   ITEM_GTPU,
 
/* Validate/create actions. */
ACTIONS,
@@ -451,6 +455,9 @@ static const enum index next_item[] = {
ITEM_MPLS,
ITEM_GRE,
ITEM_FUZZY,
+   ITEM_GTP,
+   ITEM_GTPC,
+   ITEM_GTPU,
ZERO,
 };
 
@@ -588,6 +595,12 @@ static const enum index item_gre[] = {
ZERO,
 };
 
+static const enum index item_gtp[] = {
+   ITEM_GTP_TEID,
+   ITEM_NEXT,
+   ZERO,
+};
+
 static const enum index next_action[] = {
ACTION_END,
ACTION_VOID,
@@ -1421,6 +1434,33 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_fuzzy,
thresh)),
},
+   [ITEM_GTP] = {
+   .name = "gtp",
+   .help = "match GTP header",
+   .priv = PRIV_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
+   .next = NEXT(item_gtp),
+   .call = parse_vc,
+   },
+   [ITEM_GTP_TEID] = {
+   .name = "teid",
+   .help = "tunnel endpoint identifier",
+   .next = NEXT(item_gtp, NEXT_ENTRY(UNSIGNED), item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_gtp, teid)),
+   },
+   [ITEM_GTPC] = {
+   .name = "gtpc",
+   .help = "match GTP header",
+   .priv = PRIV_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
+   .next = NEXT(item_gtp),
+   .call = parse_vc,
+   },
+   [ITEM_GTPU] = {
+   .name = "gtpu",
+   .help = "match GTP header",
+   .priv = PRIV_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
+   .next = NEXT(item_gtp),
+   .call = parse_vc,
+   },
 
/* Validate/create actions. */
[ACTIONS] = {
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 60a8d07..4ec8f0d 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -952,6 +952,9 @@ static const struct {
MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
+   MK_FLOW_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
+   MK_FLOW_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
+   MK_FLOW_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
 };
 
 /** Compute storage space needed by item specification. */
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 662a912..73f12ee 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -955,6 +955,23 @@ Usage example, fuzzy match a TCPv4 packets:
| 4 | END  |
+---+--+
 
+Item: ``GTP``, ``GTPC``, ``GTPU``
+^
+
+Matches a GTPv1 header.
+
+Note: GTP, GTPC and GTPU use the same structure. GTPC and GTPU item
+are defined for a user-friendly API when creating GTP-C and GTP-U
+flow rules.
+
+- ``v_pt_rsv_flags``: version (3b), protocol type (1b), reserved (1b),
+  extension header flag (1b), sequence number flag (1b), N-PDU number
+  flag (1b).
+- ``msg_type``: message type.
+- ``msg_len``: message length.
+- ``teid``: tunnel endpoint identifier.
+- Default ``mask`` matches teid only.
+
 Actions
 ~~~
 
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index aeef3e1..32223ca 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -2721,6 +2721,10 @@ This section lists supported pattern items and their 
attributes, if any.
 
   - ``thresh {unsigned}``: accuracy threshold.
 
+- ``gtp``, ``gtpc``, ``gtpu``: match GTPv1 header.
+
+  - ``teid {unsigned}``: tunnel endpoint identifier.
+
 Actions list
 
 
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index bba6169..b1a1b97 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -309,6 +309,33 @@ enum rte_flow_item_type {
   

[dpdk-dev] [PATCH v8 0/7] net/i40e: GPT-C and GTP-U enabling

2017-10-05 Thread Beilei Xing
This patch set enables RSS/FDIR/cloud filter for GPT-C and GTP-U.

v8 changes:
 - Remove 'enable RSS for new pctype' as it can be set with the
   configuration in Kirill's patch.
 - Resolve conflicts.

v7 changes:
 - Distinguish GTP-C request and response message in mbuf description.
 - Clarify GTP-C response message is not supported.
 - Version_type 0x30 is invalid for GTP-C, replace with 0x32.
 - Refine metadata parsing function.
 - Rework for checking fdir programming status.

v6 changes:
 - Reword description of GTP item and GTP structure, mainly support
   GTPv1, not include GTPv0 and GTPv2.

v5 changes:
 - Fix code style.
 - Reword commit log.

v4 changes:
 - Refine fdir related code.
 - Rework profile metadata parsing function.
 - Fix code style.

v3 changes:
 - Rework implementation to support the new profile.
 - Add GTPC and GTPU tunnel type in software packet type parser.
 - Update ptype info when loading profile.
 - Fix bug of updating pctype info.


v2 changes:
 - Enable RSS/FDIR/cloud filter dinamicly by checking profile
 - Add GTPC and GTPU items to distinguish rule for GTP-C or GTP-U
 - Rework FDIR/cloud filter enabling function

Beilei Xing (7):
  mbuf: support GTP in software packet type parser
  net/i40e: update ptype and pctype info
  ethdev: add GTP items to support flow API
  net/i40e: finish integration FDIR with generic flow API
  net/i40e: add FDIR support for GTP-C and GTP-U
  net/i40e: add cloud filter parsing function for GTP
  net/i40e: enable cloud filter for GTP-C and GTP-U

 app/test-pmd/cmdline_flow.c |  40 ++
 app/test-pmd/config.c   |   3 +
 doc/guides/prog_guide/rte_flow.rst  |  17 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   4 +
 drivers/net/i40e/i40e_ethdev.c  | 505 +++-
 drivers/net/i40e/i40e_ethdev.h  | 156 +++-
 drivers/net/i40e/i40e_fdir.c| 585 +++-
 drivers/net/i40e/i40e_flow.c| 503 
 drivers/net/i40e/rte_pmd_i40e.c |   6 +-
 lib/librte_ether/rte_flow.h |  52 +++
 lib/librte_mbuf/rte_mbuf_ptype.c|   2 +
 lib/librte_mbuf/rte_mbuf_ptype.h|  32 ++
 12 files changed, 1774 insertions(+), 131 deletions(-)

-- 
2.5.5



[dpdk-dev] [PATCH v8 1/7] mbuf: support GTP in software packet type parser

2017-10-05 Thread Beilei Xing
Add support of GTP-C and GTP-U tunnels in rte_net_get_ptype().

Signed-off-by: Beilei Xing 
Acked-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf_ptype.c |  2 ++
 lib/librte_mbuf/rte_mbuf_ptype.h | 32 
 2 files changed, 34 insertions(+)

diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
index e5c4fae..a450814 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.c
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -89,6 +89,8 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype)
case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
+   case RTE_PTYPE_TUNNEL_GTPC: return "TUNNEL_GTPC";
+   case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU";
default: return "TUNNEL_UNKNOWN";
}
 }
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index acd70bb..978c4a2 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -383,6 +383,38 @@ extern "C" {
  */
 #define RTE_PTYPE_TUNNEL_GRENAT 0x6000
 /**
+ * GTP-C (GPRS Tunnelling Protocol) control tunneling packet type.
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=2123>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=2123>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'source port'=2123>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'source port'=2123>
+ */
+#define RTE_PTYPE_TUNNEL_GTPC   0x7000
+/**
+ * GTP-U (GPRS Tunnelling Protocol) user data tunneling packet type.
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=2152>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=2152>
+ */
+#define RTE_PTYPE_TUNNEL_GTPU   0x8000
+/**
  * Mask of tunneling packet types.
  */
 #define RTE_PTYPE_TUNNEL_MASK   0xf000
-- 
2.5.5



[dpdk-dev] [PATCH v8 5/7] net/i40e: add FDIR support for GTP-C and GTP-U

2017-10-05 Thread Beilei Xing
This patch adds FDIR support for GTP-C and GTP-U. The
input set of GTP-C and GTP-U is TEID.

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.h |  30 +
 drivers/net/i40e/i40e_fdir.c   | 216 -
 drivers/net/i40e/i40e_flow.c   | 267 +++--
 3 files changed, 415 insertions(+), 98 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index ef4c503..9cd2795 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -460,6 +460,25 @@ struct i40e_vmdq_info {
 #define I40E_FLEX_WORD_MASK(off) (0x80 >> (off))
 #define I40E_FDIR_IPv6_TC_OFFSET   20
 
+/* A structure used to define the input for GTP flow */
+struct i40e_gtp_flow {
+   struct rte_eth_udpv4_flow udp; /* IPv4 UDP fields to match. */
+   uint8_t msg_type;  /* Message type. */
+   uint32_t teid; /* TEID in big endian. */
+};
+
+/* A structure used to define the input for GTP IPV4 flow */
+struct i40e_gtp_ipv4_flow {
+   struct i40e_gtp_flow gtp;
+   struct rte_eth_ipv4_flow ip4;
+};
+
+/* A structure used to define the input for GTP IPV6 flow */
+struct i40e_gtp_ipv6_flow {
+   struct i40e_gtp_flow gtp;
+   struct rte_eth_ipv6_flow ip6;
+};
+
 /*
  * A union contains the inputs for all types of flow
  * items in flows need to be in big endian
@@ -474,6 +493,14 @@ union i40e_fdir_flow {
struct rte_eth_tcpv6_flow  tcp6_flow;
struct rte_eth_sctpv6_flow sctp6_flow;
struct rte_eth_ipv6_flow   ipv6_flow;
+   struct i40e_gtp_flow   gtp_flow;
+   struct i40e_gtp_ipv4_flow  gtp_ipv4_flow;
+   struct i40e_gtp_ipv6_flow  gtp_ipv6_flow;
+};
+
+enum i40e_fdir_ip_type {
+   I40E_FDIR_IPTYPE_IPV4,
+   I40E_FDIR_IPTYPE_IPV6,
 };
 
 /* A structure used to contain extend input of flow */
@@ -483,6 +510,9 @@ struct i40e_fdir_flow_ext {
/* It is filled by the flexible payload to match. */
uint8_t is_vf;   /* 1 for VF, 0 for port dev */
uint16_t dst_id; /* VF ID, available when is_vf is 1*/
+   bool inner_ip;   /* If there is inner ip */
+   enum i40e_fdir_ip_type iip_type; /* ip type for inner ip */
+   bool customized_pctype; /* If customized pctype is used */
 };
 
 /* A structure used to define the input for a flow director filter entry */
diff --git a/drivers/net/i40e/i40e_fdir.c b/drivers/net/i40e/i40e_fdir.c
index 7b16584..7c46578 100644
--- a/drivers/net/i40e/i40e_fdir.c
+++ b/drivers/net/i40e/i40e_fdir.c
@@ -71,6 +71,16 @@
 #define I40E_FDIR_IPv6_DEFAULT_HOP_LIMITS   0xFF
 #define I40E_FDIR_IPv6_PAYLOAD_LEN  380
 #define I40E_FDIR_UDP_DEFAULT_LEN   400
+#define I40E_FDIR_GTP_DEFAULT_LEN   384
+#define I40E_FDIR_INNER_IP_DEFAULT_LEN  384
+#define I40E_FDIR_INNER_IPV6_DEFAULT_LEN344
+
+#define I40E_FDIR_GTPC_DST_PORT 2123
+#define I40E_FDIR_GTPU_DST_PORT 2152
+#define I40E_FDIR_GTP_VER_FLAG_0X30 0x30
+#define I40E_FDIR_GTP_VER_FLAG_0X32 0x32
+#define I40E_FDIR_GTP_MSG_TYPE_0X01 0x01
+#define I40E_FDIR_GTP_MSG_TYPE_0XFF 0xFF
 
 /* Wait time for fdir filter programming */
 #define I40E_FDIR_MAX_WAIT_US 1
@@ -938,16 +948,34 @@ i40e_fdir_construct_pkt(struct i40e_pf *pf,
return 0;
 }
 
+static struct i40e_customized_pctype *
+i40e_flow_fdir_find_customized_pctype(struct i40e_pf *pf, uint8_t pctype)
+{
+   struct i40e_customized_pctype *cus_pctype;
+   enum i40e_new_pctype i = I40E_CUSTOMIZED_GTPC;
+
+   for (; i < I40E_CUSTOMIZED_MAX; i++) {
+   cus_pctype = &pf->customized_pctype[i];
+   if (pctype == cus_pctype->pctype)
+   return cus_pctype;
+   }
+   return NULL;
+}
+
 static inline int
-i40e_flow_fdir_fill_eth_ip_head(const struct i40e_fdir_input *fdir_input,
+i40e_flow_fdir_fill_eth_ip_head(struct i40e_pf *pf,
+   const struct i40e_fdir_input *fdir_input,
unsigned char *raw_pkt,
bool vlan)
 {
+   struct i40e_customized_pctype *cus_pctype = NULL;
static uint8_t vlan_frame[] = {0x81, 0, 0, 0};
uint16_t *ether_type;
uint8_t len = 2 * sizeof(struct ether_addr);
struct ipv4_hdr *ip;
struct ipv6_hdr *ip6;
+   uint8_t pctype = fdir_input->pctype;
+   bool is_customized_pctype = fdir_input->flow_ext.customized_pctype;
static const uint8_t next_proto[] = {
[I40E_FILTER_PCTYPE_FRAG_IPV4] = IPPROTO_IP,
[I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = IPPROTO_TCP,
@@ -974,27 +1002,32 @@ i40e_flow_fdir_fill_eth_ip_head(const struct 
i40e_fdir_input *fdir_input,
raw_pkt += sizeof(uint16_t);
len += sizeof(uint16_t);
 
-   switch (fdir_input->pctype) {
-   case I40E_FILTER_PCTYPE_L2_PAYLOAD:
+   if (is_customized_pctype) {
+   cus

[dpdk-dev] [PATCH v8 4/7] net/i40e: finish integration FDIR with generic flow API

2017-10-05 Thread Beilei Xing
rte_eth_fdir_* structures are still used in FDIR functions.
This patch adds i40e private FDIR related structures and
functions to finish integration FDIR with generic flow API.

Signed-off-by: Beilei Xing 
---
 drivers/net/i40e/i40e_ethdev.h |  83 ++-
 drivers/net/i40e/i40e_fdir.c   | 487 +++--
 drivers/net/i40e/i40e_flow.c   |  77 +++
 3 files changed, 584 insertions(+), 63 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 9688ea8..ef4c503 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -461,6 +461,80 @@ struct i40e_vmdq_info {
 #define I40E_FDIR_IPv6_TC_OFFSET   20
 
 /*
+ * A union contains the inputs for all types of flow
+ * items in flows need to be in big endian
+ */
+union i40e_fdir_flow {
+   struct rte_eth_l2_flow l2_flow;
+   struct rte_eth_udpv4_flow  udp4_flow;
+   struct rte_eth_tcpv4_flow  tcp4_flow;
+   struct rte_eth_sctpv4_flow sctp4_flow;
+   struct rte_eth_ipv4_flow   ip4_flow;
+   struct rte_eth_udpv6_flow  udp6_flow;
+   struct rte_eth_tcpv6_flow  tcp6_flow;
+   struct rte_eth_sctpv6_flow sctp6_flow;
+   struct rte_eth_ipv6_flow   ipv6_flow;
+};
+
+/* A structure used to contain extend input of flow */
+struct i40e_fdir_flow_ext {
+   uint16_t vlan_tci;
+   uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN];
+   /* It is filled by the flexible payload to match. */
+   uint8_t is_vf;   /* 1 for VF, 0 for port dev */
+   uint16_t dst_id; /* VF ID, available when is_vf is 1*/
+};
+
+/* A structure used to define the input for a flow director filter entry */
+struct i40e_fdir_input {
+   enum i40e_filter_pctype pctype;
+   union i40e_fdir_flow flow;
+   /* Flow fields to match, dependent on flow_type */
+   struct i40e_fdir_flow_ext flow_ext;
+   /* Additional fields to match */
+};
+
+/* Behavior will be taken if FDIR match */
+enum i40e_fdir_behavior {
+   I40E_FDIR_ACCEPT = 0,
+   I40E_FDIR_REJECT,
+   I40E_FDIR_PASSTHRU,
+};
+
+/* Flow director report status
+ * It defines what will be reported if FDIR entry is matched.
+ */
+enum i40e_fdir_status {
+   I40E_FDIR_NO_REPORT_STATUS = 0, /* Report nothing. */
+   I40E_FDIR_REPORT_ID,/* Only report FD ID. */
+   I40E_FDIR_REPORT_ID_FLEX_4, /* Report FD ID and 4 flex bytes. */
+   I40E_FDIR_REPORT_FLEX_8,/* Report 8 flex bytes. */
+};
+
+/* A structure used to define an action when match FDIR packet filter. */
+struct i40e_fdir_action {
+   uint16_t rx_queue;/* Queue assigned to if FDIR match. */
+   enum i40e_fdir_behavior behavior; /* Behavior will be taken */
+   enum i40e_fdir_status report_status;  /* Status report option */
+   /* If report_status is I40E_FDIR_REPORT_ID_FLEX_4 or
+* I40E_FDIR_REPORT_FLEX_8, flex_off specifies where the reported
+* flex bytes start from in flexible payload.
+*/
+   uint8_t flex_off;
+};
+
+/* A structure used to define the flow director filter entry by filter_ctrl API
+ * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and
+ * RTE_ETH_FILTER_DELETE operations.
+ */
+struct i40e_fdir_filter_conf {
+   uint32_t soft_id;
+   /* ID, an unique value is required when deal with FDIR entry */
+   struct i40e_fdir_input input;/* Input set */
+   struct i40e_fdir_action action;  /* Action taken when match */
+};
+
+/*
  * Structure to store flex pit for flow diretor.
  */
 struct i40e_fdir_flex_pit {
@@ -484,7 +558,7 @@ struct i40e_fdir_flex_mask {
 
 struct i40e_fdir_filter {
TAILQ_ENTRY(i40e_fdir_filter) rules;
-   struct rte_eth_fdir_filter fdir;
+   struct i40e_fdir_filter_conf fdir;
 };
 
 TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter);
@@ -913,7 +987,7 @@ extern const struct rte_flow_ops i40e_flow_ops;
 
 union i40e_filter_t {
struct rte_eth_ethertype_filter ethertype_filter;
-   struct rte_eth_fdir_filter fdir_filter;
+   struct i40e_fdir_filter_conf fdir_filter;
struct rte_eth_tunnel_filter_conf tunnel_filter;
struct i40e_tunnel_filter_conf consistent_tunnel_filter;
 };
@@ -990,7 +1064,7 @@ i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule 
*ethertype_rule,
 int i40e_sw_ethertype_filter_del(struct i40e_pf *pf,
 struct i40e_ethertype_filter_input *input);
 int i40e_sw_fdir_filter_del(struct i40e_pf *pf,
-   struct rte_eth_fdir_input *input);
+   struct i40e_fdir_input *input);
 struct i40e_tunnel_filter *
 i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule,
 const struct i40e_tunnel_filter_input *input);
@@ -1003,6 +1077,9 @@ int i40e_ethertype_filter_set(struct i40e_pf *pf,
 int i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 const struct rte_eth_fdir_filter

[dpdk-dev] [PATCH v8 6/7] net/i40e: add cloud filter parsing function for GTP

2017-10-05 Thread Beilei Xing
This patch adds i40e_flow_parse_gtp_filter parsing
function for GTP-C and GTP-U to support cloud filter.

Signed-off-by: Beilei Xing 
Acked-by: Jingjing Wu 
---
 drivers/net/i40e/i40e_ethdev.h |   2 +
 drivers/net/i40e/i40e_flow.c   | 153 +
 2 files changed, 155 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 9cd2795..2b2ef69 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -704,6 +704,8 @@ enum i40e_tunnel_type {
I40E_TUNNEL_TYPE_MPLSoUDP,
I40E_TUNNEL_TYPE_MPLSoGRE,
I40E_TUNNEL_TYPE_QINQ,
+   I40E_TUNNEL_TYPE_GTPC,
+   I40E_TUNNEL_TYPE_GTPU,
I40E_TUNNEL_TYPE_MAX,
 };
 
diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
index 370c93b..9470ff5 100644
--- a/drivers/net/i40e/i40e_flow.c
+++ b/drivers/net/i40e/i40e_flow.c
@@ -125,6 +125,12 @@ static int i40e_flow_parse_mpls_filter(struct rte_eth_dev 
*dev,
   const struct rte_flow_action actions[],
   struct rte_flow_error *error,
   union i40e_filter_t *filter);
+static int i40e_flow_parse_gtp_filter(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ union i40e_filter_t *filter);
 static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf,
  struct i40e_ethertype_filter *filter);
 static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
@@ -1808,6 +1814,11 @@ static struct i40e_valid_pattern 
i40e_supported_patterns[] = {
{ pattern_mpls_2, i40e_flow_parse_mpls_filter },
{ pattern_mpls_3, i40e_flow_parse_mpls_filter },
{ pattern_mpls_4, i40e_flow_parse_mpls_filter },
+   /* GTP-C & GTP-U */
+   { pattern_fdir_ipv4_gtpc, i40e_flow_parse_gtp_filter },
+   { pattern_fdir_ipv4_gtpu, i40e_flow_parse_gtp_filter },
+   { pattern_fdir_ipv6_gtpc, i40e_flow_parse_gtp_filter },
+   { pattern_fdir_ipv6_gtpu, i40e_flow_parse_gtp_filter },
/* QINQ */
{ pattern_qinq_1, i40e_flow_parse_qinq_filter },
 };
@@ -3825,6 +3836,148 @@ i40e_flow_parse_mpls_filter(struct rte_eth_dev *dev,
 }
 
 /* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported filter types: GTP TEID.
+ * 3. Mask of fields which need to be matched should be
+ *filled with 1.
+ * 4. Mask of fields which needn't to be matched should be
+ *filled with 0.
+ * 5. GTP profile supports GTPv1 only.
+ * 6. GTP-C response message ('source_port' = 2123) is not supported.
+ */
+static int
+i40e_flow_parse_gtp_pattern(struct rte_eth_dev *dev,
+   const struct rte_flow_item *pattern,
+   struct rte_flow_error *error,
+   struct i40e_tunnel_filter_conf *filter)
+{
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   const struct rte_flow_item *item = pattern;
+   const struct rte_flow_item_gtp *gtp_spec;
+   const struct rte_flow_item_gtp *gtp_mask;
+   enum rte_flow_item_type item_type;
+
+   if (!pf->gtp_support) {
+   rte_flow_error_set(error, EINVAL,
+  RTE_FLOW_ERROR_TYPE_ITEM,
+  item,
+  "GTP is not supported by default.");
+   return -rte_errno;
+   }
+
+   for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+   if (item->last) {
+   rte_flow_error_set(error, EINVAL,
+  RTE_FLOW_ERROR_TYPE_ITEM,
+  item,
+  "Not support range");
+   return -rte_errno;
+   }
+   item_type = item->type;
+   switch (item_type) {
+   case RTE_FLOW_ITEM_TYPE_ETH:
+   if (item->spec || item->mask) {
+   rte_flow_error_set(error, EINVAL,
+  RTE_FLOW_ERROR_TYPE_ITEM,
+  item,
+  "Invalid ETH item");
+   return -rte_errno;
+   }
+   break;
+   case RTE_FLOW_ITEM_TYPE_IPV4:
+   filter->ip_type = I40E_TUNNEL_IPTYPE_IPV4;
+   /* IPv4 is used to describe protocol,
+* spec and mask should be NULL.
+*/
+

[dpdk-dev] [PATCH v8 2/7] net/i40e: update ptype and pctype info

2017-10-05 Thread Beilei Xing
Update new packet type and new pctype info when downloading
profile.

Signed-off-by: Beilei Xing 
Acked-by: Jingjing Wu 
---
 drivers/net/i40e/i40e_ethdev.c  | 312 
 drivers/net/i40e/i40e_ethdev.h  |  24 
 drivers/net/i40e/rte_pmd_i40e.c |   6 +-
 3 files changed, 341 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 0b151a0..3295da0 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -65,6 +65,7 @@
 #include "i40e_rxtx.h"
 #include "i40e_pf.h"
 #include "i40e_regs.h"
+#include "rte_pmd_i40e.h"
 
 #define ETH_I40E_FLOATING_VEB_ARG  "enable_floating_veb"
 #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
@@ -1042,6 +1043,21 @@ i40e_init_fdir_filter_list(struct rte_eth_dev *dev)
return ret;
 }
 
+static void
+i40e_init_customized_info(struct i40e_pf *pf)
+{
+   int i;
+
+   /* Initialize customized pctype */
+   for (i = I40E_CUSTOMIZED_GTPC; i < I40E_CUSTOMIZED_MAX; i++) {
+   pf->customized_pctype[i].index = i;
+   pf->customized_pctype[i].pctype = I40E_FILTER_PCTYPE_INVALID;
+   pf->customized_pctype[i].valid = false;
+   }
+
+   pf->gtp_support = false;
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
@@ -1308,6 +1324,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
/* initialize Traffic Manager configuration */
i40e_tm_conf_init(dev);
 
+   /* Initialize customized information */
+   i40e_init_customized_info(pf);
+
ret = i40e_init_ethtype_filter_list(dev);
if (ret < 0)
goto err_init_ethtype_filter_list;
@@ -10769,6 +10788,299 @@ is_i40e_supported(struct rte_eth_dev *dev)
return is_device_supported(dev, &rte_i40e_pmd);
 }
 
+struct i40e_customized_pctype*
+i40e_find_customized_pctype(struct i40e_pf *pf, uint8_t index)
+{
+   int i;
+
+   for (i = 0; i < I40E_CUSTOMIZED_MAX; i++) {
+   if (pf->customized_pctype[i].index == index)
+   return &pf->customized_pctype[i];
+   }
+   return NULL;
+}
+
+static int
+i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
+ uint32_t pkg_size, uint32_t proto_num,
+ struct rte_pmd_i40e_proto_info *proto)
+{
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   uint32_t pctype_num;
+   struct rte_pmd_i40e_ptype_info *pctype;
+   uint32_t buff_size;
+   struct i40e_customized_pctype *new_pctype = NULL;
+   uint8_t proto_id;
+   uint8_t pctype_value;
+   char name[64];
+   uint32_t i, j, n;
+   int ret;
+
+   ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+   (uint8_t *)&pctype_num, sizeof(pctype_num),
+   RTE_PMD_I40E_PKG_INFO_PCTYPE_NUM);
+   if (ret) {
+   PMD_DRV_LOG(ERR, "Failed to get pctype number");
+   return -1;
+   }
+   if (!pctype_num) {
+   PMD_DRV_LOG(INFO, "No new pctype added");
+   return -1;
+   }
+
+   buff_size = pctype_num * sizeof(struct rte_pmd_i40e_proto_info);
+   pctype = rte_zmalloc("new_pctype", buff_size, 0);
+   if (!pctype) {
+   PMD_DRV_LOG(ERR, "Failed to allocate memory");
+   return -1;
+   }
+   /* get information about new pctype list */
+   ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+   (uint8_t *)pctype, buff_size,
+   RTE_PMD_I40E_PKG_INFO_PCTYPE_LIST);
+   if (ret) {
+   PMD_DRV_LOG(ERR, "Failed to get pctype list");
+   rte_free(pctype);
+   return -1;
+   }
+
+   /* Update customized pctype. */
+   for (i = 0; i < pctype_num; i++) {
+   pctype_value = pctype[i].ptype_id;
+   memset(name, 0, sizeof(name));
+   for (j = 0; j < RTE_PMD_I40E_PROTO_NUM; j++) {
+   proto_id = pctype[i].protocols[j];
+   if (proto_id == RTE_PMD_I40E_PROTO_UNUSED)
+   continue;
+   for (n = 0; n < proto_num; n++) {
+   if (proto[n].proto_id != proto_id)
+   continue;
+   strcat(name, proto[n].name);
+   strcat(name, "_");
+   break;
+   }
+   }
+   name[strlen(name) - 1] = '\0';
+   if (!strcmp(name, "GTPC"))
+   new_pctype =
+   i40e_find_customized_pctype(pf,
+ I40E_CUSTOMIZED_GTPC);
+   else if (!strcmp(name, "GTPU_IPV4"))
+   new_pctype

[dpdk-dev] [PATCH v8 7/7] net/i40e: enable cloud filter for GTP-C and GTP-U

2017-10-05 Thread Beilei Xing
This patch sets TEID of GTP-C and GTP-U as filter type
by replacing existed filter types inner_mac and TUNNEL_KEY.
This configuration will be set when adding GTP-C or
GTP-U filter rules, and it will be invalid only by
NIC core reset.

Signed-off-by: Beilei Xing 
Acked-by: Jingjing Wu 
---
 drivers/net/i40e/i40e_ethdev.c | 193 +
 drivers/net/i40e/i40e_ethdev.h |  17 ++--
 drivers/net/i40e/i40e_flow.c   |  12 +--
 3 files changed, 191 insertions(+), 31 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 3295da0..0b3b1fb 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -7069,7 +7069,7 @@ i40e_status_code i40e_replace_mpls_l1_filter(struct 
i40e_pf *pf)
/* create L1 filter */
filter_replace.old_filter_type =
I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC;
-   filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+   filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_0X11;
filter_replace.tr_bit = 0;
 
/* Prepare the buffer, 3 entries */
@@ -7117,12 +7117,12 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct 
i40e_pf *pf)
I40E_AQC_MIRROR_CLOUD_FILTER;
filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_IIP;
filter_replace.new_filter_type =
-   I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP;
+   I40E_AQC_ADD_CLOUD_FILTER_0X11;
/* Prepare the buffer, 2 entries */
filter_replace_buf.data[0] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
filter_replace_buf.data[0] |=
I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
-   filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+   filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_0X11;
filter_replace_buf.data[4] |=
I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
@@ -7140,12 +7140,131 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct 
i40e_pf *pf)
I40E_AQC_MIRROR_CLOUD_FILTER;
filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_IMAC;
filter_replace.new_filter_type =
-   I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE;
+   I40E_AQC_ADD_CLOUD_FILTER_0X12;
/* Prepare the buffer, 2 entries */
filter_replace_buf.data[0] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
filter_replace_buf.data[0] |=
I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
-   filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+   filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_0X11;
+   filter_replace_buf.data[4] |=
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+
+   status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+  &filter_replace_buf);
+   return status;
+}
+
+static enum i40e_status_code
+i40e_replace_gtp_l1_filter(struct i40e_pf *pf)
+{
+   struct i40e_aqc_replace_cloud_filters_cmd  filter_replace;
+   struct i40e_aqc_replace_cloud_filters_cmd_buf  filter_replace_buf;
+   struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+   enum i40e_status_code status = I40E_SUCCESS;
+
+   /* For GTP-C */
+   memset(&filter_replace, 0,
+  sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+   memset(&filter_replace_buf, 0,
+  sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+   /* create L1 filter */
+   filter_replace.old_filter_type =
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC;
+   filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_0X12;
+   filter_replace.tr_bit = I40E_AQC_NEW_TR_22 |
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+   /* Prepare the buffer, 2 entries */
+   filter_replace_buf.data[0] =
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD0;
+   filter_replace_buf.data[0] |=
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+   filter_replace_buf.data[2] = 0xFF;
+   filter_replace_buf.data[3] = 0xFF;
+   filter_replace_buf.data[4] =
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD1;
+   filter_replace_buf.data[4] |=
+   I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+   filter_replace_buf.data[6] = 0xFF;
+   filter_replace_buf.data[7] = 0xFF;
+   status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+  &filter_replace_buf);
+   if (status < 0)
+   return status;
+
+   /* for GTP-U */
+   memset(&filter_replace, 0,
+  sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+   memset(&filter_replace_buf, 0,
+  sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+   /* create L1 filter */
+   filter_replace.old_filter_type =
+  

Re: [dpdk-dev] [PATCH] test/crypto: fix dpaa2_sec macros and definitions

2017-10-05 Thread De Lara Guarch, Pablo
Hi Akhil,

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Akhil Goyal
> Sent: Tuesday, October 3, 2017 10:29 AM
> To: dev@dpdk.org
> Cc: Doherty, Declan ; De Lara Guarch, Pablo
> ; hemant.agra...@nxp.com;
> sta...@dpdk.org; Akhil Goyal 
> Subject: [dpdk-dev] [PATCH] test/crypto: fix dpaa2_sec macros and
> definitions
> 
> Fixes: 7a364faef185 ("cryptodev: remove crypto device type enumeration")
> 
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Akhil Goyal 
> ---
>  test/test/test_cryptodev.c  | 2 +-
>  test/test/test_cryptodev_perf.c | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/test/test/test_cryptodev.c b/test/test/test_cryptodev.c index
> a4116c6..132f99c 100644
> --- a/test/test/test_cryptodev.c
> +++ b/test/test/test_cryptodev.c
> @@ -1816,7 +1816,7 @@ test_authonly_dpaa2_sec_all(void)
>   ts_params->session_mpool,
>   ts_params->valid_devs[0],
>   rte_cryptodev_driver_id_get(
> - RTE_STR(RTE_CRYPTODEV_DPAA2_SEC_PMD)),
> + RTE_STR(CRYPTODEV_NAME_DPAA2_SEC_PMD)),
>   BLKCIPHER_AUTHONLY_TYPE);
> 
>   TEST_ASSERT_EQUAL(status, 0, "Test failed"); diff --git
> a/test/test/test_cryptodev_perf.c b/test/test/test_cryptodev_perf.c index
> 3b57e6d..5df2e6e 100644
> --- a/test/test/test_cryptodev_perf.c
> +++ b/test/test/test_cryptodev_perf.c
> @@ -211,7 +211,7 @@ static const char *pmd_name(uint8_t driver_id)  {
>   uint8_t null_pmd = rte_cryptodev_driver_id_get(
>   RTE_STR(CRYPTODEV_NAME_NULL_PMD));
> - uint8_t dpaa2_pmd = rte_cryptodev_driver_id_get(
> + uint8_t dpaa2_sec_pmd = rte_cryptodev_driver_id_get(

I think, in order to keep consistency, you should make this change also in 
test_cryptodev.c:

test/test/test_cryptodev_blockcipher.c
580:int dpaa2_pmd = rte_cryptodev_driver_id_get(
646:else if (driver_id == dpaa2_pmd)

Also, I submitted a patch to remove test_cryptodev_perf.c.
I will apply this patch first, so it is easier to integrate in the stable 
version,
but could you ack my patch if you are OK with it?

The sooner we remove it, the better, as we are investing a lot of time
in the crypto-perf app, and there is no need to maintain a duplicate, in my 
opinion.

Thanks,
Pablo



Re: [dpdk-dev] [PATCH v8 0/7] net/i40e: GPT-C and GTP-U enabling

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Xing, Beilei
> Sent: Thursday, October 5, 2017 4:15 PM
> To: Wu, Jingjing 
> Cc: Chilikin, Andrey ; dev@dpdk.org
> Subject: [PATCH v8 0/7] net/i40e: GPT-C and GTP-U enabling
> 
> This patch set enables RSS/FDIR/cloud filter for GPT-C and GTP-U.
> 
> v8 changes:
>  - Remove 'enable RSS for new pctype' as it can be set with the
>configuration in Kirill's patch.
>  - Resolve conflicts.
> 
> v7 changes:
>  - Distinguish GTP-C request and response message in mbuf description.
>  - Clarify GTP-C response message is not supported.
>  - Version_type 0x30 is invalid for GTP-C, replace with 0x32.
>  - Refine metadata parsing function.
>  - Rework for checking fdir programming status.
> 
> v6 changes:
>  - Reword description of GTP item and GTP structure, mainly support
>GTPv1, not include GTPv0 and GTPv2.
> 
> v5 changes:
>  - Fix code style.
>  - Reword commit log.
> 
> v4 changes:
>  - Refine fdir related code.
>  - Rework profile metadata parsing function.
>  - Fix code style.
> 
> v3 changes:
>  - Rework implementation to support the new profile.
>  - Add GTPC and GTPU tunnel type in software packet type parser.
>  - Update ptype info when loading profile.
>  - Fix bug of updating pctype info.
> 
> 
> v2 changes:
>  - Enable RSS/FDIR/cloud filter dinamicly by checking profile
>  - Add GTPC and GTPU items to distinguish rule for GTP-C or GTP-U
>  - Rework FDIR/cloud filter enabling function
> 
> Beilei Xing (7):
>   mbuf: support GTP in software packet type parser
>   net/i40e: update ptype and pctype info
>   ethdev: add GTP items to support flow API
>   net/i40e: finish integration FDIR with generic flow API
>   net/i40e: add FDIR support for GTP-C and GTP-U
>   net/i40e: add cloud filter parsing function for GTP
>   net/i40e: enable cloud filter for GTP-C and GTP-U
> 
>  app/test-pmd/cmdline_flow.c |  40 ++
>  app/test-pmd/config.c   |   3 +
>  doc/guides/prog_guide/rte_flow.rst  |  17 +
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst |   4 +
>  drivers/net/i40e/i40e_ethdev.c  | 505 +++-
>  drivers/net/i40e/i40e_ethdev.h  | 156 +++-
>  drivers/net/i40e/i40e_fdir.c| 585 
> +++-
>  drivers/net/i40e/i40e_flow.c| 503 
>  drivers/net/i40e/rte_pmd_i40e.c |   6 +-
>  lib/librte_ether/rte_flow.h |  52 +++
>  lib/librte_mbuf/rte_mbuf_ptype.c|   2 +
>  lib/librte_mbuf/rte_mbuf_ptype.h|  32 ++
>  12 files changed, 1774 insertions(+), 131 deletions(-)
> 
Acked-by: Jingjing Wu 


Thanks
Jingjing


Re: [dpdk-dev] [PATCH] pci/uio: enable prefetchable resources mapping

2017-10-05 Thread Bruce Richardson
On Thu, Oct 05, 2017 at 01:06:41AM +0100, Ferruh Yigit wrote:
> On 6/3/2017 11:57 PM, Changpeng Liu wrote:
> > For PCI prefetchable resources, Linux will create a
> > write combined file as well, the library will try
> > to map resourceX_wc file first, if the file does
> > not exist, then it will map resourceX as usual.
> 
> Hi Changpeng,
> 
> Code part looks OK, but can you please describe more why we should try
> write combined resource file first, what is the benefit of using it _wc
> file?
> 
> Thanks,
> ferruh
> 
Also, if we use a write combining resource file, I believe we will use
correct ordering of instructions within our drivers. Does applying this
patch not also mean that we would need memory barriers inside all the
drivers, so as to ensure that we don't have a queue doorbell write
reordered with the descriptor writes? I don't think it's safe to apply
this change on it's own, without driver changes, since all PMDs assume
strong ordering on IA.

Regards,
/Bruce

> 
> > 
> > Signed-off-by: Changpeng Liu 
> > ---
> >  lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 19 ++-
> >  1 file changed, 14 insertions(+), 5 deletions(-)
> > 
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
> > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > index fa10329..d9fc20a 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > @@ -321,7 +321,7 @@
> >  
> > /* update devname for mmap  */
> > snprintf(devname, sizeof(devname),
> > -   "%s/" PCI_PRI_FMT "/resource%d",
> > +   "%s/" PCI_PRI_FMT "/resource%d_wc",
> > pci_get_sysfs_path(),
> > loc->domain, loc->bus, loc->devid,
> > loc->function, res_idx);
> > @@ -335,13 +335,22 @@
> > }
> >  
> > /*
> > -* open resource file, to mmap it
> > +* open prefetchable resource file first, try to mmap it
> >  */
> > fd = open(devname, O_RDWR);
> > if (fd < 0) {
> > -   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> > -   devname, strerror(errno));
> > -   goto error;
> > +   snprintf(devname, sizeof(devname),
> > +   "%s/" PCI_PRI_FMT "/resource%d",
> > +   pci_get_sysfs_path(),
> > +   loc->domain, loc->bus, loc->devid,
> > +   loc->function, res_idx);
> > +   /* then try to map resource file */
> > +   fd = open(devname, O_RDWR);
> > +   if (fd < 0) {
> > +   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> > +   devname, strerror(errno));
> > +   goto error;
> > +   }
> > }
> >  
> > /* try mapping somewhere close to the end of hugepages */
> > 
> 


Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support flow API

2017-10-05 Thread Adrien Mazarguil
On Thu, Oct 05, 2017 at 08:06:38AM +, Wu, Jingjing wrote:
> 
> 
> > -Original Message-
> > From: Sean Harte [mailto:sea...@gmail.com]
> > Sent: Tuesday, October 3, 2017 4:57 PM
> > To: Adrien Mazarguil 
> > Cc: Xing, Beilei ; Wu, Jingjing 
> > ; Chilikin,
> > Andrey ; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support 
> > flow API
> > 
> > On 2 October 2017 at 13:27, Adrien Mazarguil  
> > wrote:
> > > On Fri, Sep 29, 2017 at 10:29:55AM +0100, Sean Harte wrote:
> > >> On 29 September 2017 at 09:54, Xing, Beilei  
> > >> wrote:
> > > 
> > >> >> >  /**
> > >> >> > + * RTE_FLOW_ITEM_TYPE_GTP.
> > >> >> > + *
> > >> >> > + * Matches a GTPv1 header.
> > >> >> > + */
> > >> >> > +struct rte_flow_item_gtp {
> > >> >> > +   /**
> > >> >> > +* Version (3b), protocol type (1b), reserved (1b),
> > >> >> > +* Extension header flag (1b),
> > >> >> > +* Sequence number flag (1b),
> > >> >> > +* N-PDU number flag (1b).
> > >> >> > +*/
> > >> >> > +   uint8_t v_pt_rsv_flags;
> > >> >> > +   uint8_t msg_type; /**< Message type. */
> > >> >> > +   rte_be16_t msg_len; /**< Message length. */
> > >> >> > +   rte_be32_t teid; /**< Tunnel endpoint identifier. */ };
> > >> >>
> > >> >> In future, you might add support for GTPv2 (which is used since LTE).
> > >> >> Maybe this structure should have v1 in its name to avoid confusion?
> > >> >
> > >> > I considered it before. But I think we can modify it when we support 
> > >> > GTPv2 in future,
> > and keep concise 'GTP' currently:)  since I have described it matches v1 
> > header.
> > >> >
> > >>
> > >> You could rename v_pt_rsv_flags to version_flags to avoid some future
> > >> code changes to support GTPv2. There's still the issue that not all
> > >> GTPv2 messages have a TEID though.
> > >
> > > Although they have the same size, the header of these two protocols
> > > obviously differs. My suggestion would be to go with a separate GTPv2
> > > pattern item using its own dedicated structure instead.
> > >
> > > --
> > > Adrien Mazarguil
> > > 6WIND
> > 
> > The 1st four bytes are the same (flags in first byte have different
> > meanings, but the bits indicating the version are in the same
> > location). After that, different fields in each version are optional,
> > and the headers have variable size. A single structure could be used
> > if the first field is renamed to something like "version_flags", and
> > then check that the teid field in item->mask is not set if
> > ((version_flags >> 5 == 2) && ((version_flags >> 4) & 1) == 1). If
> > there's going to be two structures, it would be good to put v1 and v2
> > in the names, in my opinion.
> 
> I think the name GTP is OK for now. Due to v1 and v2 are different, why not 
> rename them
> when the v2 supporting are introduced?

In any case I'd rather avoid renaming and modifying existing items and
structure contents once part of the API to avoid API/ABI breakage that
require deprecation notices, user applications updates and so on; rte_flow
has been created as a kind of append-only API for this reason (of course
there are exceptions, such as a bad design choice for the VLAN item I intend
to fix at some point).

I'm fine with the name "GTP" as defined now and documented as matching
GTPv1. We can add "GTPv2"-themed definitions later when some implementation
provides the ability to match this version. If you want to append the "v1"
suffix right now to be more explicit, I'm also fine with that. Your call.

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCH v5 1/9] net/i40e: add API to convert VF MAC to VF id

2017-10-05 Thread Hunt, David

Hi Santosh,

On 4/10/2017 4:41 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:

From: "Sexton, Rory" 

Need a way to convert a vf id to a pf id on the host so as to query the pf
for relevant statistics which are used for the frequency changes in the
vm_power_manager app. Used when profiles are passed down from the guest
to the host, allowing the host to map the vfs to pfs.

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---

I see that you just now sent out v5;)
But I guess v4 comment on this patch [1]
is still applicable (imo).
Thanks.

[1] http://dpdk.org/dev/patchwork/patch/29577/


The v5 went out just as you were commenting on v4. :)

I agree that your comment above needs addressing, I'll do that in v6 today.

Regards.
Dave.



Re: [dpdk-dev] [PATCH] pci/uio: enable prefetchable resources mapping

2017-10-05 Thread Bruce Richardson
On Thu, Oct 05, 2017 at 09:28:34AM +0100, Bruce Richardson wrote:
> On Thu, Oct 05, 2017 at 01:06:41AM +0100, Ferruh Yigit wrote:
> > On 6/3/2017 11:57 PM, Changpeng Liu wrote:
> > > For PCI prefetchable resources, Linux will create a
> > > write combined file as well, the library will try
> > > to map resourceX_wc file first, if the file does
> > > not exist, then it will map resourceX as usual.
> > 
> > Hi Changpeng,
> > 
> > Code part looks OK, but can you please describe more why we should try
> > write combined resource file first, what is the benefit of using it _wc
> > file?
> > 
> > Thanks,
> > ferruh
> > 
> Also, if we use a write combining resource file, I believe we will use
s/will use/will lose/

> correct ordering of instructions within our drivers. Does applying this
> patch not also mean that we would need memory barriers inside all the
> drivers, so as to ensure that we don't have a queue doorbell write
> reordered with the descriptor writes? I don't think it's safe to apply
> this change on it's own, without driver changes, since all PMDs assume
> strong ordering on IA.
> 
> Regards,
> /Bruce
> 
> > 
> > > 
> > > Signed-off-by: Changpeng Liu 
> > > ---
> > >  lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 19 ++-
> > >  1 file changed, 14 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
> > > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > > index fa10329..d9fc20a 100644
> > > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > > @@ -321,7 +321,7 @@
> > >  
> > >   /* update devname for mmap  */
> > >   snprintf(devname, sizeof(devname),
> > > - "%s/" PCI_PRI_FMT "/resource%d",
> > > + "%s/" PCI_PRI_FMT "/resource%d_wc",
> > >   pci_get_sysfs_path(),
> > >   loc->domain, loc->bus, loc->devid,
> > >   loc->function, res_idx);
> > > @@ -335,13 +335,22 @@
> > >   }
> > >  
> > >   /*
> > > -  * open resource file, to mmap it
> > > +  * open prefetchable resource file first, try to mmap it
> > >*/
> > >   fd = open(devname, O_RDWR);
> > >   if (fd < 0) {
> > > - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> > > - devname, strerror(errno));
> > > - goto error;
> > > + snprintf(devname, sizeof(devname),
> > > + "%s/" PCI_PRI_FMT "/resource%d",
> > > + pci_get_sysfs_path(),
> > > + loc->domain, loc->bus, loc->devid,
> > > + loc->function, res_idx);
> > > + /* then try to map resource file */
> > > + fd = open(devname, O_RDWR);
> > > + if (fd < 0) {
> > > + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
> > > + devname, strerror(errno));
> > > + goto error;
> > > + }
> > >   }
> > >  
> > >   /* try mapping somewhere close to the end of hugepages */
> > > 
> > 


Re: [dpdk-dev] [PATCH v2] examples/l3fwd: pass flow arguments when start app

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Monday, October 2, 2017 1:24 AM
> To: Li, Xiaoyun 
> Cc: Wu, Jingjing ; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2] examples/l3fwd: pass flow arguments when 
> start app
> 
> On Sat, 30 Sep 2017 09:59:08 +0800
> Xiaoyun Li  wrote:
> 
> > To make the performance can be tuning on different NICs or platforms. We
> > need to make the number of descriptors and Rx/TX threshold as arguments
> > when starting l3fwd application.
> >
> > Signed-off-by: Xiaoyun Li 
> 
> Not sure about this. The point of l3fwd is to make it as simple
> an application as possible to help users.
> 
> Given that drivers can now supply default values for thresholds, I think
> the l3fwd sample should get rid of all the special descriptor values it
> is setting. Then if the values are not right for best performance that should
> be pushed back to the driver writer to fix.

But now what the driver using are the arguments passed from l3fwd application,
such as RTE_TEST_RX_DESC_DEFAULT. About the threshold, I guess it is already
done by driver to use default value. For number of descriptors, any ideas? 
Diver to
provide a suggestion one?




[dpdk-dev] [PATCH v3 00/19] Vhost-user: Implement device IOTLB support

2017-10-05 Thread Maxime Coquelin
This v3 lists the feature in the release note, and fixes the bug in
is_vring_iotlb_update() reported by Yuanhan.

The purpose of this series is to add support for
VIRTIO_F_IOMMU_PLATFORM feature, by implementing device IOTLB in the
vhost-user backend. It improves the guest safety by enabling the
possibility to isolate the Virtio device.

It makes possible to use Virtio PMD in guest with using VFIO driver
without enable_unsafe_noiommu_mode parameter set, so that the DPDK
application on guest can only access memory its has been allowed to,
and preventing malicious/buggy DPDK application in guest to make
vhost-user backend write random guest memory. Note that Virtio-net
Kernel driver also support IOMMU.

The series depends on Qemu's "vhost-user: Specify and implement
device IOTLB support" [0], available upstream and which will be part
of Qemu v2.10 release.

Performance-wise, even if this RFC has still room for optimizations,
no performance degradation is noticed with static mappings (i.e. DPDK
on guest) with PVP benchmark:
Traffic Generator: Moongen (lua-trafficgen)
Acceptable Loss: 0.005%
Validation run time: 1 min
Guest DPDK version/commit: v17.05
QEMU version/commit: master (6db174aed1fd)
Virtio features: default
CPU: Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz
NIC: 2 x X710
Page size: 1G host/1G guest
Results (bidirectional, total of the two flows):
 - base: 18.8Mpps
 - base + IOTLB series, IOMMU OFF: 18.8Mpps
 - base + IOTLB series, IOMMU ON: 18.8Mpps (14.5Mpps w/o PATCH 21/21)

This is explained because IOTLB misses, which are very costly, only
happen at startup time. Indeed, once used, the buffers are not
invalidated, so if the IOTLB cache is large enough, there will be only
cache hits. Also, the use of 1G huge pages improves the IOTLB cache
searching time by reducing the number of entries.

With 2M hugepages, a performance degradation is seen with IOMMU on:
Traffic Generator: Moongen (lua-trafficgen)
Acceptable Loss: 0.005%
Validation run time: 1 min
Guest DPDK version/commit: v17.05
QEMU version/commit: master (6db174aed1fd)
Virtio features: default
CPU: Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz
NIC: 2 x X710
Page size: 2M host/2M guest
Results (bidirectional, total of the two flows):
 - base: 18.8Mpps
 - base + IOTLB series, IOMMU OFF: 18.8Mpps
 - base + IOTLB series, IOMMU ON: 13.5Mpps (12.4Mpps wo PATCH 21/21)

A possible improvement would be to merge contiguous IOTLB entries sharing
the same permissions. A very rough patch implementing this idea fixes
the performance degradation (18.8Mpps), but the required work to clean
it would delay this series after v17.11.

With dynamic mappings (i.e. Virtio-net kernel driver), this is another
story. The performance is so poor it makes it almost unusable. Indeed,
since the Kernel driver unmaps the buffers as soon as they are handled,
almost all descriptors buffers addresses translations result in an IOTLB
miss. There is not much that can be done on DPDK side, except maybe
batching IOTLB miss requests no to break bursts, but it would require
a big rework. In Qemu, we may consider enabling IOMMU MAP notifications,
so that DPDK receives the IOTLB updates without having to send IOTLB miss
request.

Regarding the design choices:
 - I initially intended to use userspace RCU library[1] for the cache
implementation, but it would have added an external dependency, and the
lib is not available in all distros. Qemu for example got rid of this
dependency by copying some of the userspace RCU lib parts into Qemu tree,
but this is not possible with DPDK due to licensing issues (RCU lib is
LGPL v2). Thanks to Jason advice, I implemented the cache using rd/wr
locks.
 - I initially implemented a per-device IOTLB cache, but the concurrent
acccesses on the IOTLB lock had huge impact on performance (~-40% in
bidirectionnal, expect even worse with multiqueue). I move to a per-
virtqueue IOTLB design, which prevents this concurrency.
 - The slave IOTLB miss request supports reply-ack feature in spec, but
this version doesn't block or busy-wait for the corresponding update so
that other queues sharing the same lcore can be processed in the meantime.

For those who would like to test the series, I made it available on
gitlab[2] (vhost_user_iotlb_v1 tag). The guest kernel command line requires
the intel_iommu=on parameter, and the guest should be started with and
iommu device attached to the virtio-net device. For example:

./qemu-system-x86_64 \
  -enable-kvm -m 4096 -smp 2 \
  -M q35,kernel-irqchip=split \
  -cpu host \
  -device intel-iommu,device-iotlb=on,intremap \
  -device ioh3420,id=root.1,chassis=1 \
  -chardev socket,id=char0,path=/tmp/vhost-user1 \
  -netdev type=vhost-user,id=hn2,chardev=char0 \
  -device 
virtio-net-pci,netdev=hn2,id=v0,mq=off,mac=$MAC,bus=root.1,disa

[dpdk-dev] [PATCH v3 01/19] Revert "vhost: workaround MQ fails to startup"

2017-10-05 Thread Maxime Coquelin
This reverts commit 04d81227960b5c1cf2f11f492100979ead20c526.

As agreed when this workaround was introduced, it can be reverted
as Qemu v2.10 that fixes the issue is now out.

The reply-ack feature is required for vhost-user IOMMU support.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.h | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 35ebd7190..2ba22dbb0 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -49,14 +49,10 @@
 #define VHOST_USER_PROTOCOL_F_REPLY_ACK3
 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
 
-/*
- * disable REPLY_ACK feature to workaround the buggy QEMU implementation.
- * Proved buggy QEMU includes v2.7 - v2.9.
- */
 #define VHOST_USER_PROTOCOL_FEATURES   ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
 (1ULL << 
VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
 (1ULL << VHOST_USER_PROTOCOL_F_RARP) | 
\
-(0ULL << 
VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+(1ULL << 
VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
 (1ULL << 
VHOST_USER_PROTOCOL_F_NET_MTU))
 
 typedef enum VhostUserRequest {
-- 
2.13.6



[dpdk-dev] [PATCH v3 02/19] vhost: make error handling consistent in rx path

2017-10-05 Thread Maxime Coquelin
In the non-mergeable receive case, when copy_mbuf_to_desc()
call fails the packet is skipped, the corresponding used element
len field is set to vnet header size, and it continues with next
packet/desc. It could be a problem because it does not know why it
failed, and assume the desc buffer is large enough.

In mergeable receive case, when copy_mbuf_to_desc_mergeable()
fails, packets burst is simply stopped.

This patch makes the non-mergeable error path to behave as the
mergeable one, as it seems the safest way. Also, doing this way
will simplify pending IOTLB miss requests handling.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index f8732dfec..59ff6c875 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -374,11 +374,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
if (unlikely(err)) {
-   used_idx = (start_idx + i) & (vq->size - 1);
-   vq->used->ring[used_idx].len = dev->vhost_hlen;
-   vhost_log_used_vring(dev, vq,
-   offsetof(struct vring_used, ring[used_idx]),
-   sizeof(vq->used->ring[used_idx]));
+   count = i;
+   break;
}
 
if (i + 1 < count)
-- 
2.13.6



[dpdk-dev] [PATCH v3 03/19] vhost: prepare send_vhost_message() to slave requests

2017-10-05 Thread Maxime Coquelin
send_vhost_message() is currently only used to send
replies, so it modifies message flags to perpare the
reply.

With upcoming channel for backend initiated request,
this function can be used to send requests.

This patch introduces a new send_vhost_reply() that
does the message flags modifications, and makes
send_vhost_message() generic.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.c | 27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index b62e3828b..a068d8651 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -919,8 +919,16 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 static int
 send_vhost_message(int sockfd, struct VhostUserMsg *msg)
 {
-   int ret;
+   if (!msg)
+   return 0;
+
+   return send_fd_message(sockfd, (char *)msg,
+   VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+}
 
+static int
+send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
+{
if (!msg)
return 0;
 
@@ -929,10 +937,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
msg->flags |= VHOST_USER_VERSION;
msg->flags |= VHOST_USER_REPLY_MASK;
 
-   ret = send_fd_message(sockfd, (char *)msg,
-   VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
-   return ret;
+   return send_vhost_message(sockfd, msg);
 }
 
 /*
@@ -1024,7 +1029,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_FEATURES:
msg.payload.u64 = vhost_user_get_features(dev);
msg.size = sizeof(msg.payload.u64);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_FEATURES:
vhost_user_set_features(dev, msg.payload.u64);
@@ -1033,7 +1038,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_PROTOCOL_FEATURES:
msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
msg.size = sizeof(msg.payload.u64);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_PROTOCOL_FEATURES:
vhost_user_set_protocol_features(dev, msg.payload.u64);
@@ -1055,7 +1060,7 @@ vhost_user_msg_handler(int vid, int fd)
 
/* it needs a reply */
msg.size = sizeof(msg.payload.u64);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_LOG_FD:
close(msg.fds[0]);
@@ -1075,7 +1080,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_VRING_BASE:
vhost_user_get_vring_base(dev, &msg);
msg.size = sizeof(msg.payload.state);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
break;
 
case VHOST_USER_SET_VRING_KICK:
@@ -1094,7 +1099,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_QUEUE_NUM:
msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
msg.size = sizeof(msg.payload.u64);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
break;
 
case VHOST_USER_SET_VRING_ENABLE:
@@ -1117,7 +1122,7 @@ vhost_user_msg_handler(int vid, int fd)
if (msg.flags & VHOST_USER_NEED_REPLY) {
msg.payload.u64 = !!ret;
msg.size = sizeof(msg.payload.u64);
-   send_vhost_message(fd, &msg);
+   send_vhost_reply(fd, &msg);
}
 
if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
-- 
2.13.6



Re: [dpdk-dev] [PATCH v4 2/9] lib/librte_power: add extra msg type for policies

2017-10-05 Thread Hunt, David


Hi Santosh,

On 4/10/2017 4:36 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 02:45 PM, David Hunt wrote:

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---

my 2cent:
General comment on implementation approach:
IMO, we should avoid PMD details in common lib area.
example: file channel_commons.h has ifdef clutter referencing
i40e pmds all over.

Perhaps we should introduce opaque handle example void * or introduce pmd
specific callback/handle which points to PMD specific metadata in power library.

Example:
struct channel_packet {
   void *pmd_specific_metadata;
}

Or someway via callback (I'm not sure at the moment)
so that we could hide PMD details in common area.

Thanks.


I would agree that PMD specific details are good left to the PMDs, 
however I think that the initial
example should be OK as is, and as new PMDs are added, we can find 
commonality between them
which stays in the example, and any really specific stuff can be pushed 
back behind an opaque.


What about the v5 I submitted (without the #ifdef's)? Are you OK with 
that for this release, and we can

fine tune as other PMDS are added in future releases?

Regards,
Dave.




Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support flow API

2017-10-05 Thread Wu, Jingjing


> -Original Message-
> From: Adrien Mazarguil [mailto:adrien.mazarg...@6wind.com]
> Sent: Thursday, October 5, 2017 4:30 PM
> To: Wu, Jingjing 
> Cc: Sean Harte ; Xing, Beilei ; 
> Chilikin,
> Andrey ; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support flow 
> API
> 
> On Thu, Oct 05, 2017 at 08:06:38AM +, Wu, Jingjing wrote:
> >
> >
> > > -Original Message-
> > > From: Sean Harte [mailto:sea...@gmail.com]
> > > Sent: Tuesday, October 3, 2017 4:57 PM
> > > To: Adrien Mazarguil 
> > > Cc: Xing, Beilei ; Wu, Jingjing 
> > > ;
> Chilikin,
> > > Andrey ; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v6 4/8] ethdev: add GTP items to support 
> > > flow API
> > >
> > > On 2 October 2017 at 13:27, Adrien Mazarguil 
> wrote:
> > > > On Fri, Sep 29, 2017 at 10:29:55AM +0100, Sean Harte wrote:
> > > >> On 29 September 2017 at 09:54, Xing, Beilei  
> > > >> wrote:
> > > > 
> > > >> >> >  /**
> > > >> >> > + * RTE_FLOW_ITEM_TYPE_GTP.
> > > >> >> > + *
> > > >> >> > + * Matches a GTPv1 header.
> > > >> >> > + */
> > > >> >> > +struct rte_flow_item_gtp {
> > > >> >> > +   /**
> > > >> >> > +* Version (3b), protocol type (1b), reserved (1b),
> > > >> >> > +* Extension header flag (1b),
> > > >> >> > +* Sequence number flag (1b),
> > > >> >> > +* N-PDU number flag (1b).
> > > >> >> > +*/
> > > >> >> > +   uint8_t v_pt_rsv_flags;
> > > >> >> > +   uint8_t msg_type; /**< Message type. */
> > > >> >> > +   rte_be16_t msg_len; /**< Message length. */
> > > >> >> > +   rte_be32_t teid; /**< Tunnel endpoint identifier. */ };
> > > >> >>
> > > >> >> In future, you might add support for GTPv2 (which is used since 
> > > >> >> LTE).
> > > >> >> Maybe this structure should have v1 in its name to avoid confusion?
> > > >> >
> > > >> > I considered it before. But I think we can modify it when we support 
> > > >> > GTPv2 in
> future,
> > > and keep concise 'GTP' currently:)  since I have described it matches v1 
> > > header.
> > > >> >
> > > >>
> > > >> You could rename v_pt_rsv_flags to version_flags to avoid some future
> > > >> code changes to support GTPv2. There's still the issue that not all
> > > >> GTPv2 messages have a TEID though.
> > > >
> > > > Although they have the same size, the header of these two protocols
> > > > obviously differs. My suggestion would be to go with a separate GTPv2
> > > > pattern item using its own dedicated structure instead.
> > > >
> > > > --
> > > > Adrien Mazarguil
> > > > 6WIND
> > >
> > > The 1st four bytes are the same (flags in first byte have different
> > > meanings, but the bits indicating the version are in the same
> > > location). After that, different fields in each version are optional,
> > > and the headers have variable size. A single structure could be used
> > > if the first field is renamed to something like "version_flags", and
> > > then check that the teid field in item->mask is not set if
> > > ((version_flags >> 5 == 2) && ((version_flags >> 4) & 1) == 1). If
> > > there's going to be two structures, it would be good to put v1 and v2
> > > in the names, in my opinion.
> >
> > I think the name GTP is OK for now. Due to v1 and v2 are different, why not 
> > rename
> them
> > when the v2 supporting are introduced?
> 
> In any case I'd rather avoid renaming and modifying existing items and
> structure contents once part of the API to avoid API/ABI breakage that
> require deprecation notices, user applications updates and so on; rte_flow
> has been created as a kind of append-only API for this reason (of course
> there are exceptions, such as a bad design choice for the VLAN item I intend
> to fix at some point).
> 
> I'm fine with the name "GTP" as defined now and documented as matching
> GTPv1. We can add "GTPv2"-themed definitions later when some implementation
> provides the ability to match this version. If you want to append the "v1"
> suffix right now to be more explicit, I'm also fine with that. Your call.
> 
Got your point, I'm also fine with the name now for GTPv1, and add "GTPv2" when
It is supported.

Thanks
Jingjing


[dpdk-dev] [PATCH v3 04/19] vhost: add support to slave requests channel

2017-10-05 Thread Maxime Coquelin
Currently, only QEMU sends requests, the backend sends
replies. In some cases, the backend may need to send
requests to QEMU, like IOTLB miss events when IOMMU is
supported.

This patch introduces a new channel for such requests.
QEMU sends a file descriptor of a new socket using
VHOST_USER_SET_SLAVE_REQ_FD.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  |  1 +
 lib/librte_vhost/vhost.h  |  2 ++
 lib/librte_vhost/vhost_user.c | 27 +++
 lib/librte_vhost/vhost_user.h | 10 +-
 4 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 474b6e493..2d30f14c4 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -207,6 +207,7 @@ vhost_new_device(void)
 
vhost_devices[i] = dev;
dev->vid = i;
+   dev->slave_req_fd = -1;
 
return i;
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 74df74717..8405f879b 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -209,6 +209,8 @@ struct virtio_net {
uint32_tnr_guest_pages;
uint32_tmax_guest_pages;
struct guest_page   *guest_pages;
+
+   int slave_req_fd;
 } __rte_cache_aligned;
 
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a068d8651..0ba66e193 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -76,6 +76,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
+   [VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 };
 
 static uint64_t
@@ -122,6 +123,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
dev->log_addr = 0;
}
+
+   if (dev->slave_req_fd >= 0) {
+   close(dev->slave_req_fd);
+   dev->slave_req_fd = -1;
+   }
 }
 
 /*
@@ -886,6 +892,23 @@ vhost_user_net_set_mtu(struct virtio_net *dev, struct 
VhostUserMsg *msg)
return 0;
 }
 
+static int
+vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+   int fd = msg->fds[0];
+
+   if (fd < 0) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Invalid file descriptor for slave channel 
(%d)\n",
+   fd);
+   return -1;
+   }
+
+   dev->slave_req_fd = fd;
+
+   return 0;
+}
+
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1113,6 +1136,10 @@ vhost_user_msg_handler(int vid, int fd)
ret = vhost_user_net_set_mtu(dev, &msg);
break;
 
+   case VHOST_USER_SET_SLAVE_REQ_FD:
+   ret = vhost_user_set_req_fd(dev, &msg);
+   break;
+
default:
ret = -1;
break;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 2ba22dbb0..98f6e6f37 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -48,12 +48,14 @@
 #define VHOST_USER_PROTOCOL_F_RARP 2
 #define VHOST_USER_PROTOCOL_F_REPLY_ACK3
 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
 
 #define VHOST_USER_PROTOCOL_FEATURES   ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
 (1ULL << 
VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
 (1ULL << VHOST_USER_PROTOCOL_F_RARP) | 
\
 (1ULL << 
VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
-(1ULL << 
VHOST_USER_PROTOCOL_F_NET_MTU))
+(1ULL << 
VHOST_USER_PROTOCOL_F_NET_MTU) | \
+(1ULL << 
VHOST_USER_PROTOCOL_F_SLAVE_REQ))
 
 typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
@@ -77,9 +79,15 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_NET_SET_MTU = 20,
+   VHOST_USER_SET_SLAVE_REQ_FD = 21,
VHOST_USER_MAX
 } VhostUserRequest;
 
+typedef enum VhostUserSlaveRequest {
+   VHOST_USER_SLAVE_NONE = 0,
+   VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
 typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
uint64_t memory_size;
-- 
2.13.6



[dpdk-dev] [PATCH v3 05/19] vhost: declare missing IOMMU-related definitions for old kernels

2017-10-05 Thread Maxime Coquelin
These defines and enums have been introduced in upstream kernel v4.8,
and backported to RHEL 7.4.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 8405f879b..94bee4c8d 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -145,6 +145,37 @@ struct vhost_virtqueue {
  #define VIRTIO_NET_F_MTU 3
 #endif
 
+/* Declare IOMMU related bits for older kernels */
+#ifndef VIRTIO_F_IOMMU_PLATFORM
+
+#define VIRTIO_F_IOMMU_PLATFORM 33
+
+struct vhost_iotlb_msg {
+   __u64 iova;
+   __u64 size;
+   __u64 uaddr;
+#define VHOST_ACCESS_RO  0x1
+#define VHOST_ACCESS_WO  0x2
+#define VHOST_ACCESS_RW  0x3
+   __u8 perm;
+#define VHOST_IOTLB_MISS   1
+#define VHOST_IOTLB_UPDATE 2
+#define VHOST_IOTLB_INVALIDATE 3
+#define VHOST_IOTLB_ACCESS_FAIL4
+   __u8 type;
+};
+
+#define VHOST_IOTLB_MSG 0x1
+
+struct vhost_msg {
+   int type;
+   union {
+   struct vhost_iotlb_msg iotlb;
+   __u8 padding[64];
+   };
+};
+#endif
+
 /*
  * Define virtio 1.0 for older kernels
  */
-- 
2.13.6



[dpdk-dev] [PATCH v3 06/19] vhost: add iotlb helper functions

2017-10-05 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/Makefile |   4 +-
 lib/librte_vhost/iotlb.c  | 259 ++
 lib/librte_vhost/iotlb.h  |  70 +
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/vhost.h  |   6 ++
 5 files changed, 338 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/iotlb.c
 create mode 100644 lib/librte_vhost/iotlb.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 4a116fe31..e1084aba5 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -47,8 +47,8 @@ LDLIBS += -lnuma
 endif
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
-  virtio_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+   vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
new file mode 100644
index 0..fcfdd25d7
--- /dev/null
+++ b/lib/librte_vhost/iotlb.c
@@ -0,0 +1,259 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include 
+#endif
+
+#include 
+
+#include "iotlb.h"
+#include "vhost.h"
+
+struct vhost_iotlb_entry {
+   TAILQ_ENTRY(vhost_iotlb_entry) next;
+
+   uint64_t iova;
+   uint64_t uaddr;
+   uint64_t size;
+   uint8_t perm;
+};
+
+#define IOTLB_CACHE_SIZE 1024
+
+static void
+vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
+{
+   struct vhost_iotlb_entry *node, *temp_node;
+
+   rte_rwlock_write_lock(&vq->iotlb_lock);
+
+   TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+   TAILQ_REMOVE(&vq->iotlb_list, node, next);
+   rte_mempool_put(vq->iotlb_pool, node);
+   }
+
+   vq->iotlb_cache_nr = 0;
+
+   rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+static void
+vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
+{
+   struct vhost_iotlb_entry *node, *temp_node;
+   int entry_idx;
+
+   rte_rwlock_write_lock(&vq->iotlb_lock);
+
+   entry_idx = rte_rand() % vq->iotlb_cache_nr;
+
+   TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+   if (!entry_idx) {
+   TAILQ_REMOVE(&vq->iotlb_list, node, next);
+   rte_mempool_put(vq->iotlb_pool, node);
+   vq->iotlb_cache_nr--;
+   break;
+   }
+   entry_idx--;
+   }
+
+   rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void
+vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+   uint64_t uaddr, uint64_t size, uint8_t perm)
+{
+   struct vhost_iotlb_entry *node, *new_node;
+   int ret;
+
+   ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+   if (ret) {
+   RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one 
entry\n");
+   vhost_user_iotlb_cache_random_evict(vq);
+   ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, 
failure\n");
+   return;
+  

[dpdk-dev] [PATCH v3 07/19] vhost: iotlb: add pending miss request list and helpers

2017-10-05 Thread Maxime Coquelin
In order to be able to handle other ports or queues while waiting
for an IOTLB miss reply, a pending list is created so that waiter
can return and restart later on with sending again a miss request.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/iotlb.c | 97 +++-
 lib/librte_vhost/iotlb.h |  6 +++
 lib/librte_vhost/vhost.h |  4 +-
 3 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index fcfdd25d7..066c37a73 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -48,7 +48,94 @@ struct vhost_iotlb_entry {
uint8_t perm;
 };
 
-#define IOTLB_CACHE_SIZE 1024
+#define IOTLB_CACHE_SIZE 2048
+
+static void
+vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
+{
+   struct vhost_iotlb_entry *node, *temp_node;
+
+   rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+   TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
+   TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
+   rte_mempool_put(vq->iotlb_pool, node);
+   }
+
+   rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
+
+bool
+vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
+   uint8_t perm)
+{
+   struct vhost_iotlb_entry *node;
+   bool found = false;
+
+   rte_rwlock_read_lock(&vq->iotlb_pending_lock);
+
+   TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
+   if ((node->iova == iova) && (node->perm == perm)) {
+   found = true;
+   break;
+   }
+   }
+
+   rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
+
+   return found;
+}
+
+void
+vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
+   uint64_t iova, uint8_t perm)
+{
+   struct vhost_iotlb_entry *node;
+   int ret;
+
+   ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
+   if (ret) {
+   RTE_LOG(INFO, VHOST_CONFIG,
+   "IOTLB pool empty, clear pending misses\n");
+   vhost_user_iotlb_pending_remove_all(vq);
+   ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
+   if (ret) {
+   RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, 
failure\n");
+   return;
+   }
+   }
+
+   node->iova = iova;
+   node->perm = perm;
+
+   rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+   TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
+
+   rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
+
+static void
+vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
+   uint64_t iova, uint64_t size, uint8_t perm)
+{
+   struct vhost_iotlb_entry *node, *temp_node;
+
+   rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+   TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
+   if (node->iova < iova)
+   continue;
+   if (node->iova >= iova + size)
+   continue;
+   if ((node->perm & perm) != node->perm)
+   continue;
+   TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
+   rte_mempool_put(vq->iotlb_pool, node);
+   }
+
+   rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
 
 static void
 vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
@@ -134,7 +221,10 @@ vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, 
uint64_t iova,
vq->iotlb_cache_nr++;
 
 unlock:
+   vhost_user_iotlb_pending_remove(vq, iova, size, perm);
+
rte_rwlock_write_unlock(&vq->iotlb_lock);
+
 }
 
 void
@@ -215,9 +305,10 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
if (vq->iotlb_pool) {
/*
 * The cache has already been initialized,
-* just drop all entries
+* just drop all cached and pending entries.
 */
vhost_user_iotlb_cache_remove_all(vq);
+   vhost_user_iotlb_pending_remove_all(vq);
return 0;
}
 
@@ -228,8 +319,10 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
socket = 0;
 
rte_rwlock_init(&vq->iotlb_lock);
+   rte_rwlock_init(&vq->iotlb_pending_lock);
 
TAILQ_INIT(&vq->iotlb_list);
+   TAILQ_INIT(&vq->iotlb_pending_list);
 
snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
dev->vid, vq_index);
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index 27b2d6b30..f1a050e44 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -32,6 +32,8 @@
 #ifndef _VHOST_IOTLB_H_
 #define _VHOST_IOTLB_H_
 
+#include 
+
 #include "vhost.h"
 
 static __rte_always_inline voi

Re: [dpdk-dev] [PATCH v5 2/9] lib/librte_power: add extra msg type for policies

2017-10-05 Thread Hunt, David

Hi Santosh,


On 4/10/2017 4:47 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---

Glad that ifdef clutter removed.
Few nits though..


  lib/librte_power/channel_commands.h | 42 +
  1 file changed, 42 insertions(+)

diff --git a/lib/librte_power/channel_commands.h 
b/lib/librte_power/channel_commands.h
index 484085b..020d9fe 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -46,6 +46,7 @@ extern "C" {
  /* Valid Commands */
  #define CPU_POWER   1
  #define CPU_POWER_CONNECT   2
+#define PKT_POLICY  3
  
  /* CPU Power Command Scaling */

  #define CPU_POWER_SCALE_UP  1
@@ -54,11 +55,52 @@ extern "C" {
  #define CPU_POWER_SCALE_MIN 4
  #define CPU_POWER_ENABLE_TURBO  5
  #define CPU_POWER_DISABLE_TURBO 6
+#define HOURS 24
+
+#define MAX_VFS 10
+
+#define MAX_VCPU_PER_VM 8
+
+typedef enum {false, true} bool;
+

do we really need typedef for bool; can't we simply
use bool data-type?


Sure, will fix.


+struct t_boost_status {
+   bool tbEnabled;
+};
+
+struct timer_profile {
+   int busy_hours[HOURS];
+   int quiet_hours[HOURS];
+   int hours_to_use_traffic_profile[HOURS];
+};
+
+enum workload {HIGH, MEDIUM, LOW};
+enum policy_to_use {
+   TRAFFIC,
+   TIME,
+   WORKLOAD
+};
+
+struct traffic {
+   uint32_t min_packet_thresh;
+   uint32_t avg_max_packet_thresh;
+   uint32_t max_max_packet_thresh;
+};
  
  struct channel_packet {

uint64_t resource_id; /**< core_num, device */
uint32_t unit;/**< scale down/up/min/max */
uint32_t command; /**< Power, IO, etc */
+   char vm_name[32];
+

How about const char * Or in case not possible then #define RTE_xx 32 Or
use existing RTE_ for same purpose or some micro local to power lib.


I'll change to use an existing RTE_xx.

--snip--

Thanks,
Dave.


[dpdk-dev] [PATCH v3 08/19] vhost-user: add support to IOTLB miss slave requests

2017-10-05 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.c | 25 +
 lib/librte_vhost/vhost_user.h |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 0ba66e193..3df5c5755 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1168,3 +1168,28 @@ vhost_user_msg_handler(int vid, int fd)
 
return 0;
 }
+
+int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+{
+   int ret;
+   struct VhostUserMsg msg = {
+   .request = VHOST_USER_SLAVE_IOTLB_MSG,
+   .flags = VHOST_USER_VERSION,
+   .size = sizeof(msg.payload.iotlb),
+   .payload.iotlb = {
+   .iova = iova,
+   .perm = perm,
+   .type = VHOST_IOTLB_MISS,
+   },
+   };
+
+   ret = send_vhost_message(dev->slave_req_fd, &msg);
+   if (ret < 0) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Failed to send IOTLB miss message (%d)\n",
+   ret);
+   return ret;
+   }
+
+   return 0;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 98f6e6f37..0b2aff14e 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -85,6 +85,7 @@ typedef enum VhostUserRequest {
 
 typedef enum VhostUserSlaveRequest {
VHOST_USER_SLAVE_NONE = 0,
+   VHOST_USER_SLAVE_IOTLB_MSG = 1,
VHOST_USER_SLAVE_MAX
 } VhostUserSlaveRequest;
 
@@ -122,6 +123,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLoglog;
+   struct vhost_iotlb_msg iotlb;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
 } __attribute((packed)) VhostUserMsg;
@@ -134,6 +136,7 @@ typedef struct VhostUserMsg {
 
 /* vhost_user.c */
 int vhost_user_msg_handler(int vid, int fd);
+int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
 
 /* socket.c */
 int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
-- 
2.13.6



[dpdk-dev] [PATCH v3 09/19] vhost: initialize vrings IOTLB caches

2017-10-05 Thread Maxime Coquelin
The per-virtqueue IOTLB cache init is done at virtqueue
init time. init_vring_queue() now takes vring id as parameter,
so that the IOTLB cache mempool name can be generated.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c | 32 +++-
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index edcf1e0c5..2493a7992 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 
+#include "iotlb.h"
 #include "vhost.h"
 
 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
@@ -111,13 +112,25 @@ free_device(struct virtio_net *dev)
 }
 
 static void
-init_vring_queue(struct vhost_virtqueue *vq)
+init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
+   struct vhost_virtqueue *vq;
+
+   if (vring_idx >= VHOST_MAX_VRING) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Failed not init vring, out of bound (%d)\n",
+   vring_idx);
+   return;
+   }
+
+   vq = dev->virtqueue[vring_idx];
+
memset(vq, 0, sizeof(struct vhost_virtqueue));
 
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
 
+   vhost_user_iotlb_init(dev, vring_idx);
/* Backends are set to -1 indicating an inactive device. */
vq->backend = -1;
 
@@ -131,12 +144,21 @@ init_vring_queue(struct vhost_virtqueue *vq)
 }
 
 static void
-reset_vring_queue(struct vhost_virtqueue *vq)
+reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
+   struct vhost_virtqueue *vq;
int callfd;
 
+   if (vring_idx >= VHOST_MAX_VRING) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Failed not init vring, out of bound (%d)\n",
+   vring_idx);
+   return;
+   }
+
+   vq = dev->virtqueue[vring_idx];
callfd = vq->callfd;
-   init_vring_queue(vq);
+   init_vring_queue(dev, vring_idx);
vq->callfd = callfd;
 }
 
@@ -153,7 +175,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t 
vring_idx)
}
 
dev->virtqueue[vring_idx] = vq;
-   init_vring_queue(vq);
+   init_vring_queue(dev, vring_idx);
 
dev->nr_vring += 1;
 
@@ -175,7 +197,7 @@ reset_device(struct virtio_net *dev)
dev->flags = 0;
 
for (i = 0; i < dev->nr_vring; i++)
-   reset_vring_queue(dev->virtqueue[i]);
+   reset_vring_queue(dev, i);
 }
 
 /*
-- 
2.13.6



[dpdk-dev] [PATCH v3 10/19] vhost-user: handle IOTLB update and invalidate requests

2017-10-05 Thread Maxime Coquelin
Vhost-user device IOTLB protocol extension introduces
VHOST_USER_IOTLB message type. The associated payload is the
vhost_iotlb_msg struct defined in Kernel, which in this was can
be either an IOTLB update or invalidate message.

On IOTLB update, the virtqueues get notified of a new entry.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.c | 43 +++
 lib/librte_vhost/vhost_user.h |  1 +
 2 files changed, 44 insertions(+)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 3df5c5755..0f23ea388 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 
+#include "iotlb.h"
 #include "vhost.h"
 #include "vhost_user.h"
 
@@ -77,6 +78,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
+   [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
 };
 
 static uint64_t
@@ -909,6 +911,43 @@ vhost_user_set_req_fd(struct virtio_net *dev, struct 
VhostUserMsg *msg)
return 0;
 }
 
+static int
+vhost_user_iotlb_msg(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+   struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
+   uint16_t i;
+   uint64_t vva;
+
+   switch (imsg->type) {
+   case VHOST_IOTLB_UPDATE:
+   vva = qva_to_vva(dev, imsg->uaddr);
+   if (!vva)
+   return -1;
+
+   for (i = 0; i < dev->nr_vring; i++) {
+   struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+   vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
+   imsg->size, imsg->perm);
+   }
+   break;
+   case VHOST_IOTLB_INVALIDATE:
+   for (i = 0; i < dev->nr_vring; i++) {
+   struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+   vhost_user_iotlb_cache_remove(vq, imsg->iova,
+   imsg->size);
+   }
+   break;
+   default:
+   RTE_LOG(ERR, VHOST_CONFIG, "Invalid IOTLB message type (%d)\n",
+   imsg->type);
+   return -1;
+   }
+
+   return 0;
+}
+
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1140,6 +1179,10 @@ vhost_user_msg_handler(int vid, int fd)
ret = vhost_user_set_req_fd(dev, &msg);
break;
 
+   case VHOST_USER_IOTLB_MSG:
+   ret = vhost_user_iotlb_msg(dev, &msg);
+   break;
+
default:
ret = -1;
break;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 0b2aff14e..46c6ff956 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -80,6 +80,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SEND_RARP = 19,
VHOST_USER_NET_SET_MTU = 20,
VHOST_USER_SET_SLAVE_REQ_FD = 21,
+   VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_MAX
 } VhostUserRequest;
 
-- 
2.13.6



[dpdk-dev] [PATCH v3 12/19] vhost: use the guest IOVA to host VA helper

2017-10-05 Thread Maxime Coquelin
Replace rte_vhost_gpa_to_vva() calls with vhost_iova_to_vva(), which
requires to also pass the mapped len and the access permissions needed.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 71 +++
 1 file changed, 58 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 59ff6c875..cdfb6f957 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 
+#include "iotlb.h"
 #include "vhost.h"
 
 #define MAX_PKT_BURST 32
@@ -211,7 +212,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
int error = 0;
 
desc = &descs[desc_idx];
-   desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+   desc->len, VHOST_ACCESS_RW);
/*
 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
@@ -255,7 +257,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
 
desc = &descs[desc->next];
-   desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+   desc->len,
+   VHOST_ACCESS_RW);
if (unlikely(!desc_addr)) {
error = -1;
goto out;
@@ -352,14 +356,20 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
}
 
rte_prefetch0(&vq->desc[desc_indexes[0]]);
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
for (i = 0; i < count; i++) {
uint16_t desc_idx = desc_indexes[i];
int err;
 
if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
-   rte_vhost_gpa_to_vva(dev->mem,
-   vq->desc[desc_idx].addr);
+   vhost_iova_to_vva(dev,
+   vq, vq->desc[desc_idx].addr,
+   vq->desc[desc_idx].len,
+   VHOST_ACCESS_RO);
if (unlikely(!descs)) {
count = i;
break;
@@ -384,6 +394,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
do_data_copy_enqueue(dev, vq);
 
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_unlock(vq);
+
rte_smp_wmb();
 
*(volatile uint16_t *)&vq->used->idx += count;
@@ -417,7 +430,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
 
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
-   rte_vhost_gpa_to_vva(dev->mem, vq->desc[idx].addr);
+   vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
+   vq->desc[idx].len,
+   VHOST_ACCESS_RO);
if (unlikely(!descs))
return -1;
 
@@ -512,7 +527,9 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
goto out;
}
 
-   desc_addr = rte_vhost_gpa_to_vva(dev->mem, buf_vec[vec_idx].buf_addr);
+   desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
+   buf_vec[vec_idx].buf_len,
+   VHOST_ACCESS_RW);
if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
error = -1;
goto out;
@@ -535,8 +552,11 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
/* done with current desc buf, get the next one */
if (desc_avail == 0) {
vec_idx++;
-   desc_addr = rte_vhost_gpa_to_vva(dev->mem,
-   buf_vec[vec_idx].buf_addr);
+   desc_addr =
+   vhost_iova_to_vva(dev, vq,
+   buf_vec[vec_idx].buf_addr,
+   buf_vec[vec_idx].buf_len,
+   VHOST_ACCESS_RW);
if (unlikely(!desc_addr)) {
error = -1;
goto out;
@@ -637,6 +65

[dpdk-dev] [PATCH v3 13/19] vhost: enable rings at the right time

2017-10-05 Thread Maxime Coquelin
When VHOST_USER_F_PROTOCOL_FEATURES is negotiated, the ring is not
enabled when started, but enabled through dedicated
VHOST_USER_SET_VRING_ENABLE request.

When not negotiated, the ring is started in enabled state, at
VHOST_USER_SET_VRING_KICK request time.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 6 --
 lib/librte_vhost/vhost_user.c | 9 +
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 6f243534e..0e2ad3322 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -158,12 +158,6 @@ init_vring_queue(struct virtio_net *dev, uint32_t 
vring_idx)
/* Backends are set to -1 indicating an inactive device. */
vq->backend = -1;
 
-   /*
-* always set the vq to enabled; this is to keep compatibility
-* with the old QEMU, whereas there is no SET_VRING_ENABLE message.
-*/
-   vq->enabled = 1;
-
TAILQ_INIT(&vq->zmbuf_list);
 }
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 0f23ea388..8aca7ef7e 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -699,6 +699,15 @@ vhost_user_set_vring_kick(struct virtio_net *dev, struct 
VhostUserMsg *pmsg)
"vring kick idx:%d file:%d\n", file.index, file.fd);
 
vq = dev->virtqueue[file.index];
+
+   /*
+* When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
+* the ring starts already enabled. Otherwise, it is enabled via
+* the SET_VRING_ENABLE message.
+*/
+   if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
+   vq->enabled = 1;
+
if (vq->kickfd >= 0)
close(vq->kickfd);
vq->kickfd = file.fd;
-- 
2.13.6



[dpdk-dev] [PATCH v3 11/19] vhost: introduce guest IOVA to backend VA helper

2017-10-05 Thread Maxime Coquelin
This patch introduces vhost_iova_to_vva() function to translate
guest's IO virtual addresses to backend's virtual addresses.

When IOMMU is enabled, the IOTLB cache is queried to get the
translation. If missing from the IOTLB cache, an IOTLB_MISS request
is sent to Qemu, and IOTLB cache is queried again on IOTLB event
notification.

When IOMMU is disabled, the passed address is a guest's physical
address, so the legacy rte_vhost_gpa_to_vva() API is used.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c | 24 
 lib/librte_vhost/vhost.h | 13 +
 2 files changed, 37 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 2493a7992..6f243534e 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -47,12 +47,36 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "iotlb.h"
 #include "vhost.h"
+#include "vhost_user.h"
 
 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
+uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
+   uint64_t iova, uint64_t size, uint8_t perm)
+{
+   uint64_t vva, tmp_size;
+
+   if (unlikely(!size))
+   return 0;
+
+   tmp_size = size;
+
+   vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
+   if (tmp_size == size)
+   return vva;
+
+   if (!vhost_user_iotlb_pending_miss(vq, iova + tmp_size, perm)) {
+   vhost_user_iotlb_pending_insert(vq, iova + tmp_size, perm);
+   vhost_user_iotlb_miss(dev, iova + tmp_size, perm);
+   }
+
+   return 0;
+}
+
 struct virtio_net *
 get_device(int vid)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 8131bef9c..79351c66f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -375,4 +375,17 @@ struct vhost_device_ops const 
*vhost_driver_callback_get(const char *path);
  */
 void vhost_backend_cleanup(struct virtio_net *dev);
 
+uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
+   uint64_t iova, uint64_t size, uint8_t perm);
+
+static __rte_always_inline uint64_t
+vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint64_t iova, uint64_t size, uint8_t perm)
+{
+   if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+   return rte_vhost_gpa_to_vva(dev->mem, iova);
+
+   return __vhost_iova_to_vva(dev, vq, iova, size, perm);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
-- 
2.13.6



[dpdk-dev] [PATCH v3 14/19] vhost: don't dereference invalid dev pointer after its reallocation

2017-10-05 Thread Maxime Coquelin
numa_realloc() reallocates the virtio_net device structure and
updates the vhost_devices[] table with the new pointer if the rings
are allocated different NUMA node.

Problem is that vhost_user_msg_handler() still dereferences old
pointer afterward.

This patch prevents this by fetching again the dev pointer in
vhost_devices[] after messages have been handled.

Cc: sta...@dpdk.org
Fixes: af295ad4698c ("vhost: realloc device and queues to same numa node as 
vring desc")
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 8aca7ef7e..f495dd36e 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1198,6 +1198,12 @@ vhost_user_msg_handler(int vid, int fd)
 
}
 
+   /*
+* The virtio_net struct might have been reallocated on a different
+* NUMA node, so dev pointer might no more be valid.
+*/
+   dev = get_device(vid);
+
if (msg.flags & VHOST_USER_NEED_REPLY) {
msg.payload.u64 = !!ret;
msg.size = sizeof(msg.payload.u64);
-- 
2.13.6



Re: [dpdk-dev] [PATCH v3 2/4] net/mrvl: add mrvl net pmd driver

2017-10-05 Thread Tomasz Duszynski
On Wed, Oct 04, 2017 at 05:59:11PM +0100, Ferruh Yigit wrote:
> On 10/4/2017 9:59 AM, Tomasz Duszynski wrote:
> > On Wed, Oct 04, 2017 at 01:24:27AM +0100, Ferruh Yigit wrote:
> >> On 10/3/2017 12:51 PM, Tomasz Duszynski wrote:
> >>> Add support for the Marvell PPv2 (Packet Processor v2) 1/10 Gbps adapter.
> >>> Driver is based on external, publicly available, light-weight Marvell
> >>> MUSDK library that provides access to network packet processor.
> >>>
> >>> Driver comes with support for the following features:
> >>>
> >>> * Speed capabilities
> >>> * Link status
> >>> * Queue start/stop
> >>> * MTU update
> >>> * Jumbo frame
> >>> * Promiscuous mode
> >>> * Allmulticast mode
> >>> * Unicast MAC filter
> >>> * Multicast MAC filter
> >>> * RSS hash
> >>> * VLAN filter
> >>> * CRC offload
> >>> * L3 checksum offload
> >>> * L4 checksum offload
> >>> * Packet type parsing
> >>> * Basic stats
> >>> * Stats per queue
> >>
> >> I have more detailed comments but in high level,
> >> what do you think splitting this patch into three patches:
> >> - Skeleton
> >> - Add Rx/Tx support
> >> - Add features, like MTU update or Promiscuous etc.. support
> > If it's how submission process works then I think you left me with no
> > other option than splitting driver into nice patchset :).
>
> No, there is no defined submission process.
>
> > On the other
> > hand driver is really a wrapper to MUSDK library and thus quite easy to
> > follow. What are the benefits of such 3-way split?
>
> To help others review/understand your code. Big code chunks are scary
> and I believe most of details gets lost in big code chunks.
>
> When someone from community wants to understand and update/improve/fix
> your code, to help them by logically split the code that their focus can
> go into more narrow part.
>
> But this also means some effort in your side, so some kind of balance is
> required.
>
> I think splitting patch into smaller logical part is helpful for others,
> what do you think, is it too much effort?
>

Fair enough. I'll split the driver as suggested. A few specific
questions about functionality each patch should contain though.

As for skeleton, I see others just put driver probing here.

As for Rx/Tx support it seems that there's no common pattern.
Functionality like starting/stopping device, queues configuration
and all the other things related to Rx/Tx should be here as well?

What's left are features which go into features-patch.

> >>
> >>>
> >>> Driver was engineered cooperatively by Semihalf and Marvell teams.
> >>>
> >>> Semihalf:
> >>> Jacek Siuda 
> >>> Tomasz Duszynski 
> >>>
> >>> Marvell:
> >>> Dmitri Epshtein 
> >>> Natalie Samsonov 
> >>>
> >>> Signed-off-by: Jacek Siuda 
> >>> Signed-off-by: Tomasz Duszynski 
> >>
> >> <...>
> >>
> >>> +static struct rte_vdev_driver pmd_mrvl_drv = {
> >>> + .probe = rte_pmd_mrvl_probe,
> >>> + .remove = rte_pmd_mrvl_remove,
> >>> +};
> >>> +
> >>> +RTE_PMD_REGISTER_VDEV(net_mrvl, pmd_mrvl_drv);
> >>
> >> Please help me understand.
> >>
> >> This driver implemented as virtual driver, because:
> >> With the help of custom kernel modules, musdk library already provides
> >> userspace datapath support. This PMD is an interface to musdk library.
> >> Is this correct?
> > That is right. Another reason this NIC is not PCI device.
>
> We support more bus now :). Out of curiosity, which bus is device on?

Bus is called Aurora2. That's proprietary SoC interconnect fabric.

>
> >>
> >> If so, just thinking loud:
> >> - Why not implement this PMD directly on top of kernel interface,
> >> removing musdk layer completely?
> >> - How big problem that this PMD depends on custom kernel code?
> > I think the main reason is that MUSDK is already used in different projects.
> > Keeping multiple codebases offering similar functionality would be quite
> > demanding in terms of extra work needed.
> >> - How library and custom kernel code delivered? For which platforms?
> > Kernel and library sources are hosted on publicly available repository.
>
> I guess it would be nice to highlight custom kernel with external
> patches is required. This is not mentioned in "Prerequisites" section of
> the document.
>

ACK

> > Driver was tested on Armada 7k/8k SoCs.
>
> Can you please provide link to the HW mentioned in documentation?
>

You can find some info here:

https://www.marvell.com/embedded-processors/armada-70xx/
https://www.marvell.com/embedded-processors/armada-80xx/

> >>
> >> <>
> >>
> >
> > --
> > - Tomasz Duszyński
> >
>

--
- Tomasz Duszyński


[dpdk-dev] [PATCH v3 15/19] vhost: postpone rings addresses translation

2017-10-05 Thread Maxime Coquelin
This patch postpones rings addresses translations and checks, as
addresses sent by the master shuld not be interpreted as long as
ring is not started and enabled[0].

When protocol features aren't negotiated, the ring is started in
enabled state, so the addresses translations are postponed to
vhost_user_set_vring_kick().
Otherwise, it is postponed to when ring is enabled, in
vhost_user_set_vring_enable().

[0]: http://lists.nongnu.org/archive/html/qemu-devel/2017-05/msg04355.html

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h  |  1 +
 lib/librte_vhost/vhost_user.c | 69 ++-
 2 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 79351c66f..903da5db5 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -125,6 +125,7 @@ struct vhost_virtqueue {
 
struct vring_used_elem  *shadow_used_ring;
uint16_tshadow_used_idx;
+   struct vhost_vring_addr ring_addrs;
 
struct batch_copy_elem  *batch_copy_elems;
uint16_tbatch_copy_nb_elems;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index f495dd36e..319867c65 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -356,6 +356,7 @@ static int
 vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
 {
struct vhost_virtqueue *vq;
+   struct vhost_vring_addr *addr = &msg->payload.addr;
 
if (dev->mem == NULL)
return -1;
@@ -363,35 +364,50 @@ vhost_user_set_vring_addr(struct virtio_net *dev, 
VhostUserMsg *msg)
/* addr->index refers to the queue index. The txq 1, rxq is 0. */
vq = dev->virtqueue[msg->payload.addr.index];
 
+   /*
+* Rings addresses should not be interpreted as long as the ring is not
+* started and enabled
+*/
+   memcpy(&vq->ring_addrs, addr, sizeof(*addr));
+
+   return 0;
+}
+
+static struct virtio_net *translate_ring_addresses(struct virtio_net *dev,
+   int vq_index)
+{
+   struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+   struct vhost_vring_addr *addr = &vq->ring_addrs;
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
-   msg->payload.addr.desc_user_addr);
+   addr->desc_user_addr);
if (vq->desc == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find desc ring address.\n",
dev->vid);
-   return -1;
+   return NULL;
}
 
-   dev = numa_realloc(dev, msg->payload.addr.index);
-   vq = dev->virtqueue[msg->payload.addr.index];
+   dev = numa_realloc(dev, vq_index);
+   vq = dev->virtqueue[vq_index];
 
vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
-   msg->payload.addr.avail_user_addr);
+   addr->avail_user_addr);
if (vq->avail == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
dev->vid);
-   return -1;
+   return NULL;
}
 
vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
-   msg->payload.addr.used_user_addr);
+   addr->used_user_addr);
if (vq->used == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find used ring address.\n",
dev->vid);
-   return -1;
+   return NULL;
}
 
if (vq->last_used_idx != vq->used->idx) {
@@ -403,7 +419,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, 
VhostUserMsg *msg)
vq->last_avail_idx = vq->used->idx;
}
 
-   vq->log_guest_addr = msg->payload.addr.log_guest_addr;
+   vq->log_guest_addr = addr->log_guest_addr;
 
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
dev->vid, vq->desc);
@@ -414,7 +430,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, 
VhostUserMsg *msg)
LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
dev->vid, vq->log_guest_addr);
 
-   return 0;
+   return dev;
 }
 
 /*
@@ -685,10 +701,11 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct 
VhostUserMsg *pmsg)
 }
 
 static void
-vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 {
struct vhost_vring_file file;
struct vhost_virtqueue *vq;
+   struct virtio_net *dev = *pdev;
 
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MA

[dpdk-dev] [PATCH v3 16/19] vhost-user: translate ring addresses when IOMMU enabled

2017-10-05 Thread Maxime Coquelin
When IOMMU is enabled, the ring addresses set by the
VHOST_USER_SET_VRING_ADDR requests are guest's IO virtual addresses,
whereas Qemu virtual addresses when IOMMU is disabled.

When enabled and the required translation is not in the IOTLB cache,
an IOTLB miss request is sent, but being called by the vhost-user
socket handling thread, the function does not wait for the requested
IOTLB update.

The function will be called again on the next IOTLB update message
reception if matching the vring addresses.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost_user.c | 44 +--
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 319867c65..90b209764 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -324,10 +324,7 @@ numa_realloc(struct virtio_net *dev, int index 
__rte_unused)
 }
 #endif
 
-/*
- * Converts QEMU virtual address to Vhost virtual address. This function is
- * used to convert the ring addresses to our address space.
- */
+/* Converts QEMU virtual address to Vhost virtual address. */
 static uint64_t
 qva_to_vva(struct virtio_net *dev, uint64_t qva)
 {
@@ -348,6 +345,30 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
return 0;
 }
 
+
+/*
+ * Converts ring address to Vhost virtual address.
+ * If IOMMU is enabled, the ring address is a guest IO virtual address,
+ * else it is a QEMU virtual address.
+ */
+static uint64_t
+ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint64_t ra, uint64_t size)
+{
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
+   uint64_t vva;
+
+   vva = vhost_user_iotlb_cache_find(vq, ra,
+   &size, VHOST_ACCESS_RW);
+   if (!vva)
+   vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW);
+
+   return vva;
+   }
+
+   return qva_to_vva(dev, ra);
+}
+
 /*
  * The virtio device sends us the desc, used and avail ring addresses.
  * This function then converts these to our address space.
@@ -380,8 +401,11 @@ static struct virtio_net *translate_ring_addresses(struct 
virtio_net *dev,
struct vhost_vring_addr *addr = &vq->ring_addrs;
 
/* The addresses are converted from QEMU virtual to Vhost virtual. */
-   vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
-   addr->desc_user_addr);
+   if (vq->desc && vq->avail && vq->used)
+   return dev;
+
+   vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
+   vq, addr->desc_user_addr, sizeof(struct vring_desc));
if (vq->desc == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find desc ring address.\n",
@@ -392,8 +416,8 @@ static struct virtio_net *translate_ring_addresses(struct 
virtio_net *dev,
dev = numa_realloc(dev, vq_index);
vq = dev->virtqueue[vq_index];
 
-   vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
-   addr->avail_user_addr);
+   vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
+   vq, addr->avail_user_addr, sizeof(struct vring_avail));
if (vq->avail == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
@@ -401,8 +425,8 @@ static struct virtio_net *translate_ring_addresses(struct 
virtio_net *dev,
return NULL;
}
 
-   vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
-   addr->used_user_addr);
+   vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
+   vq, addr->used_user_addr, sizeof(struct vring_used));
if (vq->used == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find used ring address.\n",
-- 
2.13.6



Re: [dpdk-dev] [PATCH v5 3/9] examples/vm_power_mgr: add vcpu to pcpu mapping

2017-10-05 Thread Hunt, David

Hi Santosh,


On 4/10/2017 4:58 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
  examples/vm_power_manager/channel_manager.c | 62 +
  examples/vm_power_manager/channel_manager.h | 25 
  2 files changed, 87 insertions(+)

diff --git a/examples/vm_power_manager/channel_manager.c 
b/examples/vm_power_manager/channel_manager.c
index e068ae2..03fa626 100644
--- a/examples/vm_power_manager/channel_manager.c
+++ b/examples/vm_power_manager/channel_manager.c
@@ -574,6 +574,68 @@ set_channel_status(const char *vm_name, unsigned 
*channel_list,
return num_channels_changed;
  }
  
+void

+get_all_vm(int *num_vm, int *num_cpu)
+{

nits:
s/*num_cpu/*num_vcpu


Sure. Makes it more readable.


+
+   virNodeInfo node_info;
+   virDomainPtr *domptr;
+   uint64_t mask;
+   int i, ii, numVcpus[MAX_VCPUS], cpu, n_vcpus;
+   unsigned int jj;
+   const char *vm_name;
+   unsigned int flags = VIR_CONNECT_LIST_DOMAINS_RUNNING |
+   VIR_CONNECT_LIST_DOMAINS_PERSISTENT;
+   unsigned int flag = VIR_DOMAIN_VCPU_CONFIG;
+

nits:
Perhaps add more clear name example:
s/flags/conn_flags
s/flag/domain_flags


domain_flags sounds good to me.


+
+   memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen);
+   if (virNodeGetInfo(global_vir_conn_ptr, &node_info))
+   RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n");
+

Should return from here.. since node info not retrieve ops errored out.


Sure.


+   /* Returns number of pcpus */
+   global_n_host_cpus = (unsigned int)node_info.cpus;
+
+   /* Returns number of active domains */
+   *num_vm = virConnectListAllDomains(global_vir_conn_ptr, &domptr, flags);
+   if (*num_vm <= 0)
+   RTE_LOG(ERR, CHANNEL_MANAGER, "No Active Domains Running\n");
+

ditto..


Sure.


+   for (i = 0; i < *num_vm; i++) {
+
+   /* Get Domain Names */
+   vm_name = virDomainGetName(domptr[i]);
+   lvm_info[i].vm_name = vm_name;
+
+   /* Get Number of Vcpus */
+   numVcpus[i] = virDomainGetVcpusFlags(domptr[i], flag);
+
+   /* Get Number of VCpus & VcpuPinInfo */
+   n_vcpus = virDomainGetVcpuPinInfo(domptr[i],
+   numVcpus[i], global_cpumaps,
+   global_maplen, flag);
+
+   if ((int)n_vcpus > 0) {
+   *num_cpu = n_vcpus;
+   lvm_info[i].num_cpus = n_vcpus;
+   }
+
+   /* Save pcpu in use by libvirt VMs */
+   for (ii = 0; ii < n_vcpus; ii++) {
+   mask = 0;
+   for (jj = 0; jj < global_n_host_cpus; jj++) {
+   if (VIR_CPU_USABLE(global_cpumaps,
+   global_maplen, ii, jj) > 0) {
+   mask |= 1ULL << jj;
+   }
+   }
+   ITERATIVE_BITMASK_CHECK_64(mask, cpu) {
+   lvm_info[i].pcpus[ii] = cpu;
+   }
+   }
+   }
+}
+
  int
  get_info_vm(const char *vm_name, struct vm_info *info)
  {
diff --git a/examples/vm_power_manager/channel_manager.h 
b/examples/vm_power_manager/channel_manager.h
index 47c3b9c..788c1e6 100644
--- a/examples/vm_power_manager/channel_manager.h
+++ b/examples/vm_power_manager/channel_manager.h
@@ -66,6 +66,17 @@ struct sockaddr_un _sockaddr_un;
  #define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path)
  #endif
  
+#define MAX_VMS 4

+#define MAX_VCPUS 20
+
+
+struct libvirt_vm_info {
+   const char *vm_name;
+   unsigned int pcpus[MAX_VCPUS];
+   uint8_t num_cpus;
+};
+
+struct libvirt_vm_info lvm_info[MAX_VMS];
  /* Communication Channel Status */
  enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0,
CHANNEL_MGR_CHANNEL_CONNECTED,
@@ -319,6 +330,20 @@ int set_channel_status(const char *vm_name, unsigned 
*channel_list,
   */
  int get_info_vm(const char *vm_name, struct vm_info *info);
  
+/**

+ * Populates a table with all domains running and their physical cpu.
+ * All information is gathered through libvirt api.
+ *
+ * @param noVms
+ *  modified to store number of active VMs
+ *
+ * @param noVcpus
+modified to store number of vcpus active
+ *
+ * @return
+ *   void
+ */
+void get_all_vm(int *noVms, int *noVcpus);

nits: perhaps,
void
get_all_vm(int *num_vm, int *num_vcpu)
Thanks.


Agreed, what you suggest is a more common naming convention.

Thanks,
Dave.


  #ifdef __cplusplus
  }
  #endif




[dpdk-dev] [PATCH v3 17/19] vhost-user: iommu: postpone device creation until ring are mapped

2017-10-05 Thread Maxime Coquelin
Translating the start addresses of the rings is not enough, we need to
be sure all the ring is made available by the guest.

It depends on the size of the rings, which is not known on SET_VRING_ADDR
reception. Furthermore, we need to be be safe against vring pages
invalidates.

This patch introduces a new access_ok flag per virtqueue, which is set
when all the rings are mapped, and cleared as soon as a page used by a
ring is invalidated. The invalidation part is implemented in a following
patch.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 37 ++
 lib/librte_vhost/vhost.h  |  2 ++
 lib/librte_vhost/vhost_user.c | 62 +++
 lib/librte_vhost/virtio_net.c | 60 +
 4 files changed, 121 insertions(+), 40 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0e2ad3322..ef54835a6 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -135,6 +135,43 @@ free_device(struct virtio_net *dev)
rte_free(dev);
 }
 
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+   uint64_t size;
+
+   if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+   goto out;
+
+   size = sizeof(struct vring_desc) * vq->size;
+   vq->desc = (struct vring_desc *)vhost_iova_to_vva(dev, vq,
+   vq->ring_addrs.desc_user_addr,
+   size, VHOST_ACCESS_RW);
+   if (!vq->desc)
+   return -1;
+
+   size = sizeof(struct vring_avail);
+   size += sizeof(uint16_t) * vq->size;
+   vq->avail = (struct vring_avail *)vhost_iova_to_vva(dev, vq,
+   vq->ring_addrs.avail_user_addr,
+   size, VHOST_ACCESS_RW);
+   if (!vq->avail)
+   return -1;
+
+   size = sizeof(struct vring_used);
+   size += sizeof(struct vring_used_elem) * vq->size;
+   vq->used = (struct vring_used *)vhost_iova_to_vva(dev, vq,
+   vq->ring_addrs.used_user_addr,
+   size, VHOST_ACCESS_RW);
+   if (!vq->used)
+   return -1;
+
+out:
+   vq->access_ok = 1;
+
+   return 0;
+}
+
 static void
 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 903da5db5..b3fe6bb8e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -113,6 +113,7 @@ struct vhost_virtqueue {
/* Currently unused as polling mode is enabled */
int kickfd;
int enabled;
+   int access_ok;
 
/* Physical address of used ring, for logging */
uint64_tlog_guest_addr;
@@ -378,6 +379,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
uint64_t iova, uint64_t size, uint8_t perm);
+int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 90b209764..dd6562fd8 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -391,6 +391,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev, 
VhostUserMsg *msg)
 */
memcpy(&vq->ring_addrs, addr, sizeof(*addr));
 
+   vq->desc = NULL;
+   vq->avail = NULL;
+   vq->used = NULL;
+
+   vq->access_ok = 0;
+
return 0;
 }
 
@@ -407,10 +413,10 @@ static struct virtio_net *translate_ring_addresses(struct 
virtio_net *dev,
vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
vq, addr->desc_user_addr, sizeof(struct vring_desc));
if (vq->desc == 0) {
-   RTE_LOG(ERR, VHOST_CONFIG,
+   RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to find desc ring address.\n",
dev->vid);
-   return NULL;
+   return dev;
}
 
dev = numa_realloc(dev, vq_index);
@@ -419,19 +425,19 @@ static struct virtio_net *translate_ring_addresses(struct 
virtio_net *dev,
vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
vq, addr->avail_user_addr, sizeof(struct vring_avail));
if (vq->avail == 0) {
-   RTE_LOG(ERR, VHOST_CONFIG,
+   RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
dev->vid);
-   return NULL;
+   return dev;
}
 
vq->u

[dpdk-dev] [PATCH v3 18/19] vhost: iommu: Invalidate vring in case of matching IOTLB invalidate

2017-10-05 Thread Maxime Coquelin
As soon as a page used by a ring is invalidated, the access_ok flag
is cleared, so that processing threads try to map them again.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 14 ++
 lib/librte_vhost/vhost.h  |  1 +
 lib/librte_vhost/vhost_user.c | 38 +-
 3 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index ef54835a6..061f08a69 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -172,6 +172,20 @@ vring_translate(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
return 0;
 }
 
+void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_wr_lock(vq);
+
+   vq->access_ok = 0;
+   vq->desc = NULL;
+   vq->avail = NULL;
+   vq->used = NULL;
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_wr_unlock(vq);
+}
+
 static void
 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index b3fe6bb8e..fb48f3012 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -380,6 +380,7 @@ void vhost_backend_cleanup(struct virtio_net *dev);
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
uint64_t iova, uint64_t size, uint8_t perm);
 int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
+void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index dd6562fd8..f891f5741 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -391,11 +391,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, 
VhostUserMsg *msg)
 */
memcpy(&vq->ring_addrs, addr, sizeof(*addr));
 
-   vq->desc = NULL;
-   vq->avail = NULL;
-   vq->used = NULL;
-
-   vq->access_ok = 0;
+   vring_invalidate(dev, vq);
 
return 0;
 }
@@ -1012,6 +1008,35 @@ is_vring_iotlb_update(struct vhost_virtqueue *vq, struct 
vhost_iotlb_msg *imsg)
 }
 
 static int
+is_vring_iotlb_invalidate(struct vhost_virtqueue *vq,
+   struct vhost_iotlb_msg *imsg)
+{
+   uint64_t istart, iend, vstart, vend;
+
+   istart = imsg->iova;
+   iend = istart + imsg->size - 1;
+
+   vstart = (uint64_t)vq->desc;
+   vend = vstart + sizeof(struct vring_desc) * vq->size - 1;
+   if (vstart <= iend && istart <= vend)
+   return 1;
+
+   vstart = (uint64_t)vq->avail;
+   vend = vstart + sizeof(struct vring_avail);
+   vend += sizeof(uint16_t) * vq->size - 1;
+   if (vstart <= iend && istart <= vend)
+   return 1;
+
+   vstart = (uint64_t)vq->used;
+   vend = vstart + sizeof(struct vring_used);
+   vend += sizeof(struct vring_used_elem) * vq->size - 1;
+   if (vstart <= iend && istart <= vend)
+   return 1;
+
+   return 0;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 {
struct virtio_net *dev = *pdev;
@@ -1041,6 +1066,9 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct 
VhostUserMsg *msg)
 
vhost_user_iotlb_cache_remove(vq, imsg->iova,
imsg->size);
+
+   if (is_vring_iotlb_invalidate(vq, imsg))
+   vring_invalidate(dev, vq);
}
break;
default:
-- 
2.13.6



[dpdk-dev] [PATCH v3 19/19] vhost: enable IOMMU support

2017-10-05 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 doc/guides/rel_notes/release_17_11.rst | 4 
 lib/librte_vhost/vhost.h   | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index 170f4f916..c0fc4ac7f 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -41,6 +41,10 @@ New Features
  Also, make sure to start the actual text at the margin.
  =
 
+* **Added IOMMU support to libvhost-user**
+
+  Implemented device IOTLB in Vhost-user backend, and enabled Virtio's IOMMU
+  feature.
 
 Resolved Issues
 ---
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index fb48f3012..598c65b56 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -211,7 +211,8 @@ struct vhost_msg {
(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
-   (1ULL << VIRTIO_NET_F_MTU))
+   (1ULL << VIRTIO_NET_F_MTU) | \
+   (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 
 
 struct guest_page {
-- 
2.13.6



Re: [dpdk-dev] [PATCH v5 4/9] examples/vm_power_mgr: add scale to medium freq fn

2017-10-05 Thread Hunt, David

Hi Santosh,


On 4/10/2017 5:04 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---
  examples/vm_power_manager/power_manager.c | 15 +++
  examples/vm_power_manager/power_manager.h | 13 +
  2 files changed, 28 insertions(+)

diff --git a/examples/vm_power_manager/power_manager.c 
b/examples/vm_power_manager/power_manager.c
index 80705f9..c021c1d 100644
--- a/examples/vm_power_manager/power_manager.c
+++ b/examples/vm_power_manager/power_manager.c
@@ -286,3 +286,18 @@ power_manager_disable_turbo_core(unsigned int core_num)
POWER_SCALE_CORE(disable_turbo, core_num, ret);
return ret;
  }
+
+int
+power_manager_scale_core_med(unsigned int core_num)
+{
+   int ret = 0;
+
+   if (core_num >= POWER_MGR_MAX_CPUS)
+   return -1;
+   if (!(global_enabled_cpus & (1ULL << core_num)))
+   return -1;
+   rte_spinlock_lock(&global_core_freq_info[core_num].power_sl);
+   ret = rte_power_set_freq(core_num, 5);

nits:
what is 5? also should be enum or macro.

Thanks.



This probably shouldn't be hard-coded. The intention is to select a 
middle frequency. I can add a helper function to get the value

that is halfway between min and max, and use that instead.

Thanks,
Dave.





Re: [dpdk-dev] [PATCH v2 03/12] cryptodev: extend cryptodev to support security APIs

2017-10-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Akhil Goyal [mailto:akhil.go...@nxp.com]
> Sent: Tuesday, October 3, 2017 2:14 PM
> To: dev@dpdk.org
> Cc: Doherty, Declan ; De Lara Guarch, Pablo
> ; hemant.agra...@nxp.com; Nicolau,
> Radu ; bor...@mellanox.com;
> avia...@mellanox.com; tho...@monjalon.net; sandeep.ma...@nxp.com;
> jerin.ja...@caviumnetworks.com; Mcnamara, John
> ; olivier.m...@6wind.com
> Subject: [PATCH v2 03/12] cryptodev: extend cryptodev to support security
> APIs
> 

I would change the title to something a bit shorter "cryptodev: support 
security APIs"?
No need to duplicate "cryptodev".

> security ops are added to crypto device to support protocol offloaded
> security operations.
> 
> Signed-off-by: Akhil Goyal 
> Signed-off-by: Declan Doherty 
> ---

...

> diff --git a/lib/librte_cryptodev/rte_cryptodev.c
> b/lib/librte_cryptodev/rte_cryptodev.c
> index 327d7e8..e52fc88 100644
> --- a/lib/librte_cryptodev/rte_cryptodev.c
> +++ b/lib/librte_cryptodev/rte_cryptodev.c
> @@ -488,6 +488,16 @@ rte_cryptodev_devices_get(const char
> *driver_name, uint8_t *devices,
>   return count;
>  }
> 
> +uint16_t
> +rte_cryptodev_get_sec_id(uint8_t dev_id) {
> + if (rte_crypto_devices[dev_id].feature_flags &
> + RTE_CRYPTODEV_FF_SECURITY)
> + return rte_crypto_devices[dev_id].data->sec_id;
> +
> + return -1;

The return value of this function is uint16_t, but you are returning a -1
if the crypto device does not support security sessions.

> +}
> +
>  int
>  rte_cryptodev_socket_id(uint8_t dev_id)  { diff --git
> a/lib/librte_cryptodev/rte_cryptodev.h
> b/lib/librte_cryptodev/rte_cryptodev.h
> index 7ec9c4b..867b5be 100644
> --- a/lib/librte_cryptodev/rte_cryptodev.h
> +++ b/lib/librte_cryptodev/rte_cryptodev.h
> @@ -51,8 +51,6 @@ extern "C" {
>  #include 
>  #include 
> 
> -extern const char **rte_cyptodev_names;
> -

I think this removal deserves another patch, separate from this patchset.

>  /* Logging Macros */
> 
>  #define CDEV_LOG_ERR(...) \
> @@ -351,6 +349,8 @@ rte_cryptodev_get_aead_algo_enum(enum
> rte_crypto_aead_algorithm *algo_enum,  /**< Utilises CPU NEON
> instructions */
>  #define  RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11)
>  /**< Utilises ARM CPU Cryptographic Extensions */
> +#define  RTE_CRYPTODEV_FF_SECURITY   (1ULL << 12)
> +/**< Support Security Protocol Processing */
> 
> 
>  /**
> @@ -761,6 +761,9 @@ struct rte_cryptodev {
>   /**< Flag indicating the device is attached */  }
> __rte_cache_aligned;
> 
> +uint16_t
> +rte_cryptodev_get_sec_id(uint8_t dev_id);
> +
>  /**
>   *
>   * The data part, with no function pointers, associated with each device.
> @@ -789,6 +792,7 @@ struct rte_cryptodev_data {
> 
>   void *dev_private;
>   /**< PMD-specific private data */
> + uint16_t sec_id;

Add a description about "sec_id".

>  } __rte_cache_aligned;
> 



[dpdk-dev] [dpdk-announce] CFP Deadline for DPDK Summit North America

2017-10-05 Thread O'Driscoll, Tim
The original CFP deadline for our DPDK Summit North America expired yesterday. 
This has now been extended to Friday October 13th (note that the website 
currently says Wednesday 13th but that's just a typo and will be corrected). 
So, if you were planning to submit a proposal but didn't make the deadline 
yesterday, there's still time to do this.

Details of the event, including registration and CFP info, are available at: 
http://events.linuxfoundation.org/events/dpdk-north-america-summit.

This year, the annual OVS event is on the following two days (November 16th & 
17th), in the same venue. Details of the OVS event are available at: 
http://events.linuxfoundation.org/events/open-vswitch-fall-event.

We've been talking to the LF and to the OVS team about taking advantage of this 
by having some joint sessions on OVS-DPDK, and on DPDK-accelerated vswitches in 
general, on the afternoon of Wednesday 15th. It would be good to get some 
proposals on this before the new CFP deadline at the end of next week.




Re: [dpdk-dev] [PATCH v2 05/12] lib/librte_mbuf: add security crypto flags and mbuf fields

2017-10-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Akhil Goyal [mailto:akhil.go...@nxp.com]
> Sent: Tuesday, October 3, 2017 2:14 PM
> To: dev@dpdk.org
> Cc: Doherty, Declan ; De Lara Guarch, Pablo
> ; hemant.agra...@nxp.com; Nicolau,
> Radu ; bor...@mellanox.com;
> avia...@mellanox.com; tho...@monjalon.net; sandeep.ma...@nxp.com;
> jerin.ja...@caviumnetworks.com; Mcnamara, John
> ; olivier.m...@6wind.com
> Subject: [PATCH v2 05/12] lib/librte_mbuf: add security crypto flags and
> mbuf fields
> 
> From: Boris Pismenny 
> 

Quick comment about the commit message:
1 - For libraries, there is no need to use "lib/librte...".
The convention is to directly use the library name:
In this case: "mbuf: add security...". Same applies to patch 4.
Also, make sure that the commit message starts with capital letter.

Thanks,
Pablo

> add security crypto flags and update mbuf fields to support IPsec crypto
> offload for transmitted packets, and to indicate crypto result for received
> packets.
> 



Re: [dpdk-dev] [PATCH v4 3/4] eventdev: Add eventdev ethernet Rx adapter

2017-10-05 Thread Rao, Nikhil

On 10/2/2017 4:09 PM, Jerin Jacob wrote:

-Original Message-

Date: Mon, 2 Oct 2017 15:58:56 +0530
From: "Rao, Nikhil" 
To: Jerin Jacob 
CC: bruce.richard...@intel.com, gage.e...@intel.com, dev@dpdk.org,
  tho...@monjalon.net, harry.van.haa...@intel.com, hemant.agra...@nxp.com,
  nipun.gu...@nxp.com, narender.vang...@intel.com,
  erik.g.carri...@intel.com, abhinandan.guj...@intel.com,
  santosh.shu...@caviumnetworks.com
Subject: Re: [PATCH v4 3/4] eventdev: Add eventdev ethernet Rx adapter
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101
  Thunderbird/52.3.0

On 9/25/2017 8:29 AM, Rao, Nikhil wrote:

On 9/24/2017 11:46 PM, Rao, Nikhil wrote:

On 9/22/2017 2:40 PM, Jerin Jacob wrote:


When we worked on a prototype, we figured out that we need a
separate event type
for RX adapter. Probably RTE_EVENT_TYPE_ETHDEV_RX_ADAPTER?
The Reason is:
- In the HW based Rx adapter case, the packet are coming
directly to eventdev once it is configured.
- So on a HW implementation of the event dequeue(), CPU needs to
convert HW specific
metadata to mbuf
- The event dequeue() is used in two cases
a) octeontx eventdev driver used with any external NIC
b) octeontx eventdev driver used with integrated NIC(without service
core to inject the packet)
We need some identifier to understand case (a) and (b).So, in
dequeue(), if the
packet is from RTE_EVENT_TYPE_ETHDEV then we can do "HW specific
metadata" to mbuf
conversion and in another case (!RTE_EVENT_TYPE_ETHDEV) result
in no mbuf
conversion.

Application can check if it is an Ethernet type event by
ev.event_type == RTE_EVENT_TYPE_ETHDEV || ev.event_type ==
RTE_EVENT_TYPE_ETHDEV_RX_ADAPTER



As per my understanding, the case (a) uses an in built port
Is it possible for the eventdev PMD to do the conversion based off
the eventdev port ?



I realized the dequeue wouldn't have knowledge of the port the event was
injected from, the application shouldn't have to see the difference
between case (a) & (b).

Would it be possible to use the impl_opaque field within struct rte_event ?

Nikhil


Hi Jerin,

Any further thoughts on this ?


impl_opaque field could be one option. But I think, NXP driver is using
it for internal operation. So overriding it from Rx adapter will cause
issue. How about adding new event type? So it gets a new name space so no
collision.

➜ [master][dpdk-next-eventdev] $ git diff
diff --git a/lib/librte_eventdev/rte_eventdev.h
b/lib/librte_eventdev/rte_eventdev.h
index ec7aabd9a..b33423c7e 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -878,6 +878,8 @@ rte_event_dev_close(uint8_t dev_id);
  /**< The event generated from cpu for pipelining.
   * Application may use *sub_event_type* to further classify the event
   */
+#define RTE_EVENT_TYPE_ETHDEV_ADAPTER   0x4
+/**< The event generated from ethdev Rx adapter */
  #define RTE_EVENT_TYPE_MAX  0x10
  /**< Maximum number of event types */

The event source is really RTE_EVENT_TYPE_CPU here, but is the 
assumption that the RTE_EVENT_TYPE_CPU name space is owned by the 
application (it's actions are driven by a combination of event type and 
event sub type) and extending the event source count is the only option 
here.


Nikhil


Re: [dpdk-dev] API in dpdk to get total free physical memory

2017-10-05 Thread Burakov, Anatoly

On 05-Oct-17 6:56 AM, Venumadhav Josyula wrote:

Hi All,

Like 'rte_eal_get_physmem_size' api to the total size of the physical memory. 
Is there an API to get to get total free memory physical memory available ?

We want such API we are planning to implement such API for the same

/* get the total size of memory */
uint64_t
rte_eal_get_physmem_free(int socket_id)
{
 const struct rte_mem_config *mcfg;
 unsigned i = 0;
 uint64_t total_len = 0;

 /* get pointer to global configuration */
 mcfg = rte_eal_get_configuration()->mem_config;

 for (i=0; ifree_memseg[i].addr == NULL)
 break;

 if (mcfg->free_memseg[i].len == 0)
 continue;

 /* bad socket ID */
 if (socket_id != SOCKET_ID_ANY &&
 mcfg->free_memseg[i].socket_id != SOCKET_ID_ANY 
&&
 socket_id != 
mcfg->free_memseg[i].socket_id)
 continue;

 total_len += mcfg->free_memseg[i].len;
 }

 return total_len;
}

Thanks,
Regards
Venu


Hi Venu,

I don't think there is such an API, so you're welcome to submit a patch.

--
Thanks,
Anatoly


Re: [dpdk-dev] [PATCH v5 4/9] examples/vm_power_mgr: add scale to medium freq fn

2017-10-05 Thread santosh
Hi David,


On Thursday 05 October 2017 02:17 PM, Hunt, David wrote:
> Hi Santosh,
>
>
> On 4/10/2017 5:04 PM, santosh wrote:
>> Hi David,
>>
>>
>> On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:
>>> Signed-off-by: Nemanja Marjanovic 
>>> Signed-off-by: Rory Sexton 
>>> Signed-off-by: David Hunt 
>>> ---
>>>   examples/vm_power_manager/power_manager.c | 15 +++
>>>   examples/vm_power_manager/power_manager.h | 13 +
>>>   2 files changed, 28 insertions(+)
>>>
>>> diff --git a/examples/vm_power_manager/power_manager.c 
>>> b/examples/vm_power_manager/power_manager.c
>>> index 80705f9..c021c1d 100644
>>> --- a/examples/vm_power_manager/power_manager.c
>>> +++ b/examples/vm_power_manager/power_manager.c
>>> @@ -286,3 +286,18 @@ power_manager_disable_turbo_core(unsigned int core_num)
>>>   POWER_SCALE_CORE(disable_turbo, core_num, ret);
>>>   return ret;
>>>   }
>>> +
>>> +int
>>> +power_manager_scale_core_med(unsigned int core_num)
>>> +{
>>> +int ret = 0;
>>> +
>>> +if (core_num >= POWER_MGR_MAX_CPUS)
>>> +return -1;
>>> +if (!(global_enabled_cpus & (1ULL << core_num)))
>>> +return -1;
>>> +rte_spinlock_lock(&global_core_freq_info[core_num].power_sl);
>>> +ret = rte_power_set_freq(core_num, 5);
>> nits:
>> what is 5? also should be enum or macro.
>>
>> Thanks.
>>
>
> This probably shouldn't be hard-coded. The intention is to select a middle 
> frequency. I can add a helper function to get the value
> that is halfway between min and max, and use that instead.
>
I'm ok with your proposition.
Thanks.

> Thanks,
> Dave.
>
>
>



Re: [dpdk-dev] [PATCH v2 0/2] ethdev: add support for raw flow type for flow director

2017-10-05 Thread Rybalchenko, Kirill
Hi Thomas,

As Ferruh rightly said, this feature does not affect any
other drivers except i40e.
It was implemented as a temporary measure to accelerate
adoption of new protocols in DPDK.
It does not eliminate importance of rte_flow.
As you can see, we've already added GTP as a part of rte_flow.
(see Beilei Xing patch http://dpdk.org/ml/archives/dev/2017-October/077483.html)

Regards,
Kirill.

> -Original Message-
> From: Yigit, Ferruh
> Sent: Wednesday 4 October 2017 20:47
> To: Thomas Monjalon ; Rybalchenko, Kirill
> 
> Cc: dev@dpdk.org; Chilikin, Andrey ; Xing, Beilei
> ; Wu, Jingjing 
> Subject: Re: [dpdk-dev] [PATCH v2 0/2] ethdev: add support for raw flow
> type for flow director
> 
> On 10/4/2017 6:56 PM, Thomas Monjalon wrote:
> > 04/10/2017 19:44, Ferruh Yigit:
> >> On 10/4/2017 5:57 PM, Thomas Monjalon wrote:
> >>> 03/10/2017 21:02, Ferruh Yigit:
>  On 9/20/2017 9:42 AM, Kirill Rybalchenko wrote:
> > For complex packets use raw flow type with pre-constructed packet
> > buffer instead of creating a packet internally in PMD.
> >>>
> >>> Sorry for not catching this series before.
> >>>
> >>> As it has been said several times on this mailing list, the flow
> >>> director API is deprecated.
> >>> I think everybody here knows that it is going to be replaced by
> >>> rte_flow.
> >>>
> >>> That's why it does not make sense to extend flow director.
> >>> We are not going to update PMDs to support a new type of legacy flow
> >>> director.
> >>> Please focus your efforts on rte_flow.
> >>
> >> As far as I can see this is not to to extend flow director. But
> >> driver uses this struct and adding a new feature into driver requires
> >> update in this struct.
> >>
> >> I guess idea was for new filter functionalities PMD should use
> >> rte_flow, that is the new shiny method we have, I see the point here.
> >> But I don't see the point of making these structs under use completely
> immutable.
> >
> > I don't know what is RTE_ETH_FLOW_RAW.
> > Let's start by explaining it, how it is used by users, and why this
> > struct is needed.
> 
> Let me answer as much as I get from patches, if something is missing or
> wrong Kirill needs to correct it.
> 
> Driver (i40e) works with static pre-defined flow and pctypes. But new
> feature DDP lets loading custom profiles and work with custom and perhaps
> not yet defined flow and pctypes. So there are a few other patches to make
> driver more dynamic.
> 
> For this case I40E_VALID_FLOW() fails with custom flow types, and
> RTE_ETH_FLOW_RAW used as kind of (void *) to be able to work with new
> dynamic types.
> 
> > Thanks
> >



Re: [dpdk-dev] [PATCH v2 0/9] Add support for AES-CCM

2017-10-05 Thread Zhang, Roy Fan


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pablo de Lara
> Sent: Thursday, September 21, 2017 2:11 PM
> To: Doherty, Declan ; Trahe, Fiona
> ; Jain, Deepak K ; Griffin,
> John 
> Cc: dev@dpdk.org; De Lara Guarch, Pablo 
> Subject: [dpdk-dev] [PATCH v2 0/9] Add support for AES-CCM
> 
> AES-CCM support is added in the OpenSSL and QAT PMDs.
> The PMDs and the test code have been reworked, to avoid duplications with
> AES-GCM code, as both algorithms are quite similar (both are AEAD
> algorithms).
> 
> Also, an optimization for AES-GCM (and AES-CCM after the last patch) has
> been introduced, initializing the OpenSSL Context with the key, at session
> creation, instead of for each operation.
> 
> Changes in v2:
> - Clarified API for AES-CCM
> - Modified OpenSSL PMD and sample apps to comply with API
> - Added support for AES-CCM in QAT
> - Extended test cases for 192 and 256 bit keys

Series Acked-by: Fan Zhang 


Re: [dpdk-dev] [PATCH v2 11/12] crypto/dpaa2_sec: add support for protocol offload ipsec

2017-10-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Akhil Goyal [mailto:akhil.go...@nxp.com]
> Sent: Tuesday, October 3, 2017 2:14 PM
> To: dev@dpdk.org
> Cc: Doherty, Declan ; De Lara Guarch, Pablo
> ; hemant.agra...@nxp.com; Nicolau,
> Radu ; bor...@mellanox.com;
> avia...@mellanox.com; tho...@monjalon.net; sandeep.ma...@nxp.com;
> jerin.ja...@caviumnetworks.com; Mcnamara, John
> ; olivier.m...@6wind.com
> Subject: [PATCH v2 11/12] crypto/dpaa2_sec: add support for protocol
> offload ipsec
> 
> driver implementation to support rte_security APIs
> 
> Signed-off-by: Akhil Goyal 

...

> +/**
> + * Checksum
> + *
> + * @param buffer calculate chksum for buffer
> + * @param lenbuffer length
> + *
> + * @return checksum value in host cpu order  */ static inline uint16_t

Tiny comment. Return type should be in a separate line.

> +calc_chksum(void *buffer, int len) {



Re: [dpdk-dev] [PATCH v4 2/9] lib/librte_power: add extra msg type for policies

2017-10-05 Thread santosh
Hi David,


On Thursday 05 October 2017 02:08 PM, Hunt, David wrote:
>
> Hi Santosh,
>
> On 4/10/2017 4:36 PM, santosh wrote:
>> Hi David,
>>
>>
>> On Wednesday 04 October 2017 02:45 PM, David Hunt wrote:
>>> Signed-off-by: Nemanja Marjanovic 
>>> Signed-off-by: Rory Sexton 
>>> Signed-off-by: David Hunt 
>>> ---
>> my 2cent:
>> General comment on implementation approach:
>> IMO, we should avoid PMD details in common lib area.
>> example: file channel_commons.h has ifdef clutter referencing
>> i40e pmds all over.
>>
>> Perhaps we should introduce opaque handle example void * or introduce pmd
>> specific callback/handle which points to PMD specific metadata in power 
>> library.
>>
>> Example:
>> struct channel_packet {
>>void *pmd_specific_metadata;
>> }
>>
>> Or someway via callback (I'm not sure at the moment)
>> so that we could hide PMD details in common area.
>>
>> Thanks.
>
> I would agree that PMD specific details are good left to the PMDs, however I 
> think that the initial
> example should be OK as is, and as new PMDs are added, we can find 
> commonality between them
> which stays in the example, and any really specific stuff can be pushed back 
> behind an opaque.
>
> What about the v5 I submitted (without the #ifdef's)? Are you OK with that 
> for this release, and we can
> fine tune as other PMDS are added in future releases?
>
Yes. But in future releases, we should do more code clean up in power lib and 
example area..
meaning; current example implementation uses names like _vsi.. specific to 
intel NICs,
we should remove such naming and their dependency code from example area.

Thanks.

> Regards,
> Dave.
>
>



[dpdk-dev] [RFC] sched: parameterize QoS traffic-classes and queues

2017-10-05 Thread alangordondewar
From: Alan Dewar 

The DPDK QoS framework has hierarchy of QoS scheduling elements: port,
subport, pipe, traffic-class and queue.  The first two levels of the
hierarchy are flexible (port and subport) in the number child nodes
that each parent can have, but from the pipe layer down the number of
child nodes is hard-coded as four.

These proposed changes allow these hard-coded limits to be modified by
changing a couple of compile-time constants.

The default configuration remains as four TCs and four queues.

The sched_autotest passes successfully with the default configuration.

Real world testing has included 2 x 4, 4 x 4 and 4 x 8 (TCs x queues)
configurations.

Signed-off-by: Alan Dewar 
---
 lib/librte_sched/rte_sched.c| 412 
 lib/librte_sched/rte_sched.h|  27 ++-
 lib/librte_sched/rte_sched_common.h |  16 ++
 3 files changed, 268 insertions(+), 187 deletions(-)

diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index b7cba11..d540553 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -65,8 +65,7 @@
 #endif
 
 #define RTE_SCHED_TB_RATE_CONFIG_ERR  (1e-7)
-#define RTE_SCHED_WRR_SHIFT   3
-#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE)
+#define RTE_SCHED_GRINDER_PCACHE_SIZE 4
 #define RTE_SCHED_PIPE_INVALIDUINT32_MAX
 #define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
 
@@ -165,12 +164,12 @@ enum grinder_state {
  * by scheduler enqueue.
  */
 struct rte_sched_port_hierarchy {
-   uint16_t queue:2;/**< Queue ID (0 .. 3) */
-   uint16_t traffic_class:2;/**< Traffic class ID (0 .. 3)*/
-   uint32_t color:2;/**< Color */
-   uint16_t unused:10;
-   uint16_t subport;/**< Subport ID */
-   uint32_t pipe;   /**< Pipe ID */
+   uint16_t queue:RTE_SCHED_WRR_SHIFT;/**< Queue ID */
+   uint16_t traffic_class:RTE_SCHED_TC_SHIFT; /**< Traffic class ID */
+   uint16_t color:2;  /**< Color */
+   uint32_t unused:16 - (2 + RTE_SCHED_WRR_SHIFT + RTE_SCHED_TC_SHIFT);
+   uint16_t subport;  /**< Subport ID */
+   uint32_t pipe; /**< Pipe ID */
 };
 
 struct rte_sched_grinder {
@@ -196,9 +195,9 @@ struct rte_sched_grinder {
 
/* Current TC */
uint32_t tc_index;
-   struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-   struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-   uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+   struct rte_sched_queue *queue[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+   struct rte_mbuf **qbase[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+   uint32_t qindex[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
uint16_t qsize;
uint32_t qmask;
uint32_t qpos;
@@ -219,7 +218,7 @@ struct rte_sched_port {
uint32_t frame_overhead;
uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
uint32_t n_pipe_profiles;
-   uint32_t pipe_tc3_rate_max;
+   uint32_t pipe_low_prio_tc_rate_max;
 #ifdef RTE_SCHED_RED
struct rte_red_config 
red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
 #endif
@@ -289,8 +288,8 @@ rte_sched_port_queues_per_port(struct rte_sched_port *port)
 static inline struct rte_mbuf **
 rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
 {
-   uint32_t pindex = qindex >> 4;
-   uint32_t qpos = qindex & 0xF;
+   uint32_t pindex = qindex >> RTE_SCHED_TC_WRR_SHIFT;
+   uint32_t qpos = qindex & RTE_SCHED_TC_WRR_MASK;
 
return (port->queue_array + pindex *
port->qsize_sum + port->qsize_add[qpos]);
@@ -299,7 +298,7 @@ rte_sched_port_qbase(struct rte_sched_port *port, uint32_t 
qindex)
 static inline uint16_t
 rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
 {
-   uint32_t tc = (qindex >> 2) & 0x3;
+   uint32_t tc = (qindex >> RTE_SCHED_WRR_SHIFT) & RTE_SCHED_TC_MASK;
 
return port->qsize[tc];
 }
@@ -373,7 +372,7 @@ rte_sched_port_check_params(struct rte_sched_port_params 
*params)
return -13;
 
 #ifdef RTE_SCHED_SUBPORT_TC_OV
-   /* TC3 oversubscription weight: non-zero */
+   /* Lowest priority TC oversubscription weight: non-zero */
if (p->tc_ov_weight == 0)
return -14;
 #endif
@@ -471,43 +470,81 @@ rte_sched_port_get_memory_footprint(struct 
rte_sched_port_params *params)
 static void
 rte_sched_port_config_qsize(struct rte_sched_port *port)
 {
-   /* TC 0 */
-   port->qsize_add[0] = 0;
-   port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
-   port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
-   port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
-
-   /* TC 1 */
-  

Re: [dpdk-dev] [PATCH v5 1/9] net/i40e: add API to convert VF MAC to VF id

2017-10-05 Thread santosh

On Thursday 05 October 2017 02:01 PM, Hunt, David wrote:
> Hi Santosh,
>
> On 4/10/2017 4:41 PM, santosh wrote:
>> Hi David,
>>
>>
>> On Wednesday 04 October 2017 08:55 PM, David Hunt wrote:
>>> From: "Sexton, Rory" 
>>>
>>> Need a way to convert a vf id to a pf id on the host so as to query the pf
>>> for relevant statistics which are used for the frequency changes in the
>>> vm_power_manager app. Used when profiles are passed down from the guest
>>> to the host, allowing the host to map the vfs to pfs.
>>>
>>> Signed-off-by: Nemanja Marjanovic 
>>> Signed-off-by: Rory Sexton 
>>> Signed-off-by: David Hunt 
>>> ---
>> I see that you just now sent out v5;)
>> But I guess v4 comment on this patch [1]
>> is still applicable (imo).
>> Thanks.
>>
>> [1] http://dpdk.org/dev/patchwork/patch/29577/
>
> The v5 went out just as you were commenting on v4. :)
>
> I agree that your comment above needs addressing, I'll do that in v6 today.
>
Thanks.

> Regards.
> Dave.
>



Re: [dpdk-dev] [PATCH 09/10] app/test-crypto-perf: fix memory leak

2017-10-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Olivier Matz
> Sent: Monday, September 11, 2017 4:14 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 09/10] app/test-crypto-perf: fix memory leak
> 
> data is allocated but never freed.
> 
> Fixes: f8be1786b1b8 ("app/crypto-perf: introduce performance test
> application")
> 
> Signed-off-by: Olivier Matz 

Acked-by: Pablo de Lara 


Re: [dpdk-dev] [PATCH] virtio: use pointer to replace memcpy

2017-10-05 Thread Zhang, Roy Fan


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Zhiyong Yang
> Sent: Friday, August 11, 2017 3:13 AM
> To: dev@dpdk.org
> Cc: maxime.coque...@redhat.com; y...@fridaylinux.org; Yang, Zhiyong
> 
> Subject: [dpdk-dev] [PATCH] virtio: use pointer to replace memcpy
> 
> To use pointer instead of memcpy can save many cycles in the funciton
> virtio_send_command.
> 
> cc: maxime.coque...@redhat.com
> cc: y...@fridaylinux.org
> 
> Signed-off-by: Zhiyong Yang 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 

Acked-by: Fan Zhang 


[dpdk-dev] [PATCH v4 2/7] net/mlx4: restore full Rx support bypassing Verbs

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

This patch adds support for accessing the hardware directly when handling
Rx packets eliminating the need to use Verbs in the Rx data path.

The number of scatters is limited to one.

Signed-off-by: Vasily Philipov 
Signed-off-by: Moti Haimovsky 
Signed-off-by: Ophir Munk 
---
 drivers/net/mlx4/mlx4_rxq.c   | 110 +++--
 drivers/net/mlx4/mlx4_rxtx.c  | 223 +++---
 drivers/net/mlx4/mlx4_rxtx.h  |  18 ++--
 drivers/net/mlx4/mlx4_utils.h |  20 
 4 files changed, 212 insertions(+), 159 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 409983f..9b98d86 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -51,6 +51,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -77,20 +78,17 @@
 mlx4_rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
 {
unsigned int i;
-   struct rxq_elt (*elts)[elts_n] =
-   rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
+   struct rte_mbuf *(*elts)[elts_n] =
+   rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, rxq->socket);
 
+   assert(rte_is_power_of_2(elts_n));
if (elts == NULL) {
rte_errno = ENOMEM;
ERROR("%p: can't allocate packets array", (void *)rxq);
goto error;
}
-   /* For each WR (packet). */
for (i = 0; (i != elts_n); ++i) {
-   struct rxq_elt *elt = &(*elts)[i];
-   struct ibv_recv_wr *wr = &elt->wr;
-   struct ibv_sge *sge = &(*elts)[i].sge;
+   volatile struct mlx4_wqe_data_seg *scat = &(*rxq->wqes)[i];
struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
if (buf == NULL) {
@@ -98,37 +96,32 @@
ERROR("%p: empty mbuf pool", (void *)rxq);
goto error;
}
-   elt->buf = buf;
-   wr->next = &(*elts)[(i + 1)].wr;
-   wr->sg_list = sge;
-   wr->num_sge = 1;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
-   /* sge->addr must be able to store a pointer. */
-   assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-   /* SGE keeps its headroom. */
-   sge->addr = (uintptr_t)
-   ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-   sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-   sge->lkey = rxq->mr->lkey;
-   /* Redundant check for tailroom. */
-   assert(sge->length == rte_pktmbuf_tailroom(buf));
+   buf->port = rxq->port_id;
+   buf->data_len = rte_pktmbuf_tailroom(buf);
+   buf->pkt_len = rte_pktmbuf_tailroom(buf);
+   buf->nb_segs = 1;
+   *scat = (struct mlx4_wqe_data_seg){
+   .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t)),
+   .byte_count = rte_cpu_to_be_32(buf->data_len),
+   .lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+   };
+   (*elts)[i] = buf;
}
-   /* The last WR pointer must be NULL. */
-   (*elts)[(i - 1)].wr.next = NULL;
DEBUG("%p: allocated and configured %u single-segment WRs",
  (void *)rxq, elts_n);
-   rxq->elts_n = elts_n;
-   rxq->elts_head = 0;
+   rxq->elts_n = log2above(elts_n);
rxq->elts = elts;
return 0;
 error:
if (elts != NULL) {
for (i = 0; (i != RTE_DIM(*elts)); ++i)
-   rte_pktmbuf_free_seg((*elts)[i].buf);
+   rte_pktmbuf_free_seg((*rxq->elts)[i]);
rte_free(elts);
}
DEBUG("%p: failed, freed everything", (void *)rxq);
@@ -146,17 +139,16 @@
 mlx4_rxq_free_elts(struct rxq *rxq)
 {
unsigned int i;
-   unsigned int elts_n = rxq->elts_n;
-   struct rxq_elt (*elts)[elts_n] = rxq->elts;
 
-   DEBUG("%p: freeing WRs", (void *)rxq);
+   if (rxq->elts == NULL)
+   return;
+   DEBUG("%p: freeing Rx queue elements", (void *)rxq);
+   for (i = 0; i != (1u << rxq->elts_n); ++i)
+   if ((*rxq->elts)[i] != NULL)
+   rte_pktmbuf_free_seg((*rxq->elts)[i]);
+   rte_free(rxq->elts);
rxq->elts_n = 0;
rxq->elts = NULL;
-   if (elts == NULL)
-   return;
-   for (i = 0; (i != RTE_DIM(*elts)); ++i)
-   rte_pktmbuf_free_seg((*elts)[i].buf);
-   rte_free(elts);
 }

[dpdk-dev] [PATCH v4 1/7] net/mlx4: add simple Tx bypassing Verbs

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

Modify PMD to send single-buffer packets directly to the device bypassing
the Verbs Tx post and poll routines.

Signed-off-by: Moti Haimovsky 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_prm.h  | 120 +++
 drivers/net/mlx4/mlx4_rxtx.c | 337 ---
 drivers/net/mlx4/mlx4_rxtx.h |  28 ++--
 drivers/net/mlx4/mlx4_txq.c  |  51 +++
 mk/rte.app.mk|   2 +-
 5 files changed, 436 insertions(+), 102 deletions(-)
 create mode 100644 drivers/net/mlx4/mlx4_prm.h

diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
new file mode 100644
index 000..085a595
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MLX4_PRM_H_
+#define MLX4_PRM_H_
+
+#include 
+#include 
+#include 
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include 
+#include 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/* ConnectX-3 Tx queue basic block. */
+#define MLX4_TXBB_SHIFT 6
+#define MLX4_TXBB_SIZE (1 << MLX4_TXBB_SHIFT)
+
+/* Typical TSO descriptor with 16 gather entries is 352 bytes. */
+#define MLX4_MAX_WQE_SIZE 512
+#define MLX4_MAX_WQE_TXBBS (MLX4_MAX_WQE_SIZE / MLX4_TXBB_SIZE)
+
+/* Send queue stamping/invalidating information. */
+#define MLX4_SQ_STAMP_STRIDE 64
+#define MLX4_SQ_STAMP_DWORDS (MLX4_SQ_STAMP_STRIDE / 4)
+#define MLX4_SQ_STAMP_SHIFT 31
+#define MLX4_SQ_STAMP_VAL 0x7fff
+
+/* Work queue element (WQE) flags. */
+#define MLX4_BIT_WQE_OWN 0x8000
+
+#define MLX4_SIZE_TO_TXBBS(size) \
+   (RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT))
+
+/* Send queue information. */
+struct mlx4_sq {
+   uint8_t *buf; /**< SQ buffer. */
+   uint8_t *eob; /**< End of SQ buffer */
+   uint32_t head; /**< SQ head counter in units of TXBBS. */
+   uint32_t tail; /**< SQ tail counter in units of TXBBS. */
+   uint32_t txbb_cnt; /**< Num of WQEBB in the Q (should be ^2). */
+   uint32_t txbb_cnt_mask; /**< txbbs_cnt mask (txbb_cnt is ^2). */
+   uint32_t headroom_txbbs; /**< Num of txbbs that should be kept free. */
+   uint32_t *db; /**< Pointer to the doorbell. */
+   uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
+};
+
+#define mlx4_get_send_wqe(sq, n) ((sq)->buf + ((n) * (MLX4_TXBB_SIZE)))
+
+/* Completion queue information. */
+struct mlx4_cq {
+   uint8_t *buf; /**< Pointer to the completion queue buffer. */
+   uint32_t cqe_cnt; /**< Number of entries in the queue. */
+   uint32_t cqe_64:1; /**< CQ entry size is 64 bytes. */
+   uint32_t cons_index; /**< Last queue entry that was handled. */
+   uint32_t *set_ci_db; /**< Pointer to the completion queue doorbell. */
+};
+
+/**
+ * Retrieve a CQE entry from a CQ.
+ *
+ * cqe = cq->buf + cons_index * cqe_size + cqe_offset
+ *
+ * Where cqe_size is 32 or 64 bytes and cqe_offset is 0 or 32 (depending on
+ * cqe_size).
+ *
+ * @param cq
+ *   CQ to retrieve entry from.
+ * @param index
+ *   Entry index.
+ *
+ * @return
+ *   Pointer to CQE entry.
+ */
+static inline struct mlx4_cqe *
+mlx4_get_cqe(struct mlx4_cq *cq, uint32_t index)
+{
+   return (struct mlx4_cqe *)(cq->buf +
+

[dpdk-dev] [PATCH v4 3/7] net/mlx4: restore Rx scatter support

2017-10-05 Thread Ophir Munk
Calculate the number of scatters on the fly according to
the maximum expected packet size.

Signed-off-by: Vasily Philipov 
Signed-off-by: Ophir Munk 
---
 drivers/net/mlx4/mlx4_rxq.c  | 64 +---
 drivers/net/mlx4/mlx4_rxtx.c | 11 +---
 drivers/net/mlx4/mlx4_rxtx.h |  1 +
 3 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 9b98d86..44d095d 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -78,6 +78,7 @@
 mlx4_rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
 {
unsigned int i;
+   const uint32_t sges_n = 1 << rxq->sges_n;
struct rte_mbuf *(*elts)[elts_n] =
rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, rxq->socket);
 
@@ -101,6 +102,9 @@
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
+   /* Only the first segment keeps headroom. */
+   if (i % sges_n)
+   buf->data_off = 0;
buf->port = rxq->port_id;
buf->data_len = rte_pktmbuf_tailroom(buf);
buf->pkt_len = rte_pktmbuf_tailroom(buf);
@@ -113,8 +117,8 @@
};
(*elts)[i] = buf;
}
-   DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq, elts_n);
+   DEBUG("%p: allocated and configured %u segments (max %u packets)",
+ (void *)rxq, elts_n, elts_n >> rxq->sges_n);
rxq->elts_n = log2above(elts_n);
rxq->elts = elts;
return 0;
@@ -185,12 +189,15 @@
  *   Completion queue to associate with QP.
  * @param desc
  *   Number of descriptors in QP (hint only).
+ * @param sges_n
+ *   Maximum number of segments per packet.
  *
  * @return
  *   QP pointer or NULL in case of error and rte_errno is set.
  */
 static struct ibv_qp *
-mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
+ uint32_t sges_n)
 {
struct ibv_qp *qp;
struct ibv_qp_init_attr attr = {
@@ -204,7 +211,7 @@
priv->device_attr.max_qp_wr :
desc),
/* Maximum number of segments per packet. */
-   .max_recv_sge = 1,
+   .max_recv_sge = sges_n,
},
.qp_type = IBV_QPT_RAW_PACKET,
};
@@ -263,11 +270,31 @@
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
(mb_len - RTE_PKTMBUF_HEADROOM)) {
-   ;
+   tmpl.sges_n = 0;
} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-   WARN("%p: scattered mode has been requested but is"
-" not supported, this may lead to packet loss",
-(void *)dev);
+   uint32_t size =
+   RTE_PKTMBUF_HEADROOM +
+   dev->data->dev_conf.rxmode.max_rx_pkt_len;
+   uint32_t sges_n;
+
+   /*
+* Determine the number of SGEs needed for a full packet
+* and round it to the next power of two.
+*/
+   sges_n = log2above((size / mb_len) + !!(size % mb_len));
+   tmpl.sges_n = sges_n;
+   /* Make sure sges_n did not overflow. */
+   size = mb_len * (1 << tmpl.sges_n);
+   size -= RTE_PKTMBUF_HEADROOM;
+   if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+   rte_errno = EOVERFLOW;
+   ERROR("%p: too many SGEs (%u) needed to handle"
+ " requested maximum packet size %u",
+ (void *)dev,
+ 1 << sges_n,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len);
+   goto error;
+   }
} else {
WARN("%p: the requested maximum Rx packet size (%u) is"
 " larger than a single mbuf (%u) and scattered"
@@ -276,6 +303,17 @@
 dev->data->dev_conf.rxmode.max_rx_pkt_len,
 mb_len - RTE_PKTMBUF_HEADROOM);
}
+   DEBUG("%p: maximum number of segments per packet: %u",
+ (void *)dev, 1 << tmpl.sges_n);
+   if (desc % (1 << tmpl.sges_n)) {
+   rte_errno = EINVAL;
+   ERROR("%p: number of RX queue descriptors (%u) is not a"
+ " multiple of maximum segments per packet (%u)",
+ (void *)dev,
+ desc,
+ 1 << tmpl.sges_n);
+   goto error;
+   }
/* Use the entire Rx mempool as the me

[dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs

2017-10-05 Thread Ophir Munk
v4 (Ophir):
- Split "net/mlx4: restore Rx scatter support" commit from "net/mlx4: 
  restore full Rx support bypassing Verbs" commit

v3 (Adrien):
- Drop a few unrelated or unnecessary changes such as the removal of
  MLX4_PMD_TX_MP_CACHE.
- Move device checksum support detection code to its previous location.
- Fix include guard in mlx4_prm.h.
- Reorder #includes alphabetically.
- Replace MLX4_TRANSPOSE() macro with documented inline function.
- Remove extra spaces and blank lines.
- Use uint8_t * instead of char * for buffers.
- Replace mlx4_get_cqe() macro with a documented inline function.
- Replace several unsigned int with uint32_t.
- Add consistency to field names (sge_n => sges_n).
- Make mbuf size checks in RX queue setup function similar to mlx5.
- Update various comments.
- Fix indentation.
- Replace run-time endian conversion with static ones where possible.
- Reorder fields in struct rxq and struct txq for consistency, remove
  one level of unnecessary inner structures.
- Fix memory leak on Tx bounce buffer.
- Update commit logs.
- Fix remaining checkpatch warnings.

v2 (Matan):
Rearange patches.
Semantics.
Enhancements.
Fix compilation issues.

Moti Haimovsky (6):
  net/mlx4: add simple Tx bypassing Verbs
  net/mlx4: restore full Rx support bypassing Verbs
  net/mlx4: restore Tx gather support
  net/mlx4: restore Tx checksum offloads
  net/mlx4: restore Rx offloads
  net/mlx4: add loopback Tx from VF

Ophir Munk (1):
  net/mlx4: restore Rx scatter support

 drivers/net/mlx4/mlx4.c|  11 +
 drivers/net/mlx4/mlx4.h|   2 +
 drivers/net/mlx4/mlx4_ethdev.c |  10 +
 drivers/net/mlx4/mlx4_prm.h| 152 
 drivers/net/mlx4/mlx4_rxq.c| 179 ++
 drivers/net/mlx4/mlx4_rxtx.c   | 768 ++---
 drivers/net/mlx4/mlx4_rxtx.h   |  54 +--
 drivers/net/mlx4/mlx4_txq.c|  67 +++-
 drivers/net/mlx4/mlx4_utils.h  |  20 ++
 mk/rte.app.mk  |   2 +-
 10 files changed, 975 insertions(+), 290 deletions(-)
 create mode 100644 drivers/net/mlx4/mlx4_prm.h

-- 
1.8.3.1




[dpdk-dev] [PATCH v4 4/7] net/mlx4: restore Tx gather support

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

This patch adds support for transmitting packets spanning over multiple
buffers.

In this patch we also take into consideration the amount of entries a
packet occupies in the TxQ when setting the report-completion flag of the
chip.

Signed-off-by: Moti Haimovsky 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_rxtx.c | 197 +--
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  |  12 ++-
 3 files changed, 127 insertions(+), 88 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index fd8ef7b..cc0baaa 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -63,6 +63,15 @@
 #include "mlx4_utils.h"
 
 /**
+ * Pointer-value pair structure used in tx_post_send for saving the first
+ * DWORD (32 byte) of a TXBB.
+ */
+struct pv {
+   struct mlx4_wqe_data_seg *dseg;
+   uint32_t val;
+};
+
+/**
  * Stamp a WQE so it won't be reused by the HW.
  *
  * Routine is used when freeing WQE used by the chip or when failing
@@ -291,24 +300,28 @@
  *   Target Tx queue.
  * @param pkt
  *   Packet to transmit.
- * @param send_flags
- *   @p MLX4_WQE_CTRL_CQ_UPDATE to request completion on this packet.
  *
  * @return
  *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static inline int
-mlx4_post_send(struct txq *txq, struct rte_mbuf *pkt, uint32_t send_flags)
+mlx4_post_send(struct txq *txq, struct rte_mbuf *pkt)
 {
struct mlx4_wqe_ctrl_seg *ctrl;
struct mlx4_wqe_data_seg *dseg;
struct mlx4_sq *sq = &txq->msq;
+   struct rte_mbuf *buf;
uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
uint32_t lkey;
uintptr_t addr;
+   uint32_t srcrb_flags;
+   uint32_t owner_opcode = MLX4_OPCODE_SEND;
+   uint32_t byte_count;
int wqe_real_size;
int nr_txbbs;
int rc;
+   struct pv *pv = (struct pv *)txq->bounce_buf;
+   int pv_counter = 0;
 
/* Calculate the needed work queue entry size for this packet. */
wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
@@ -324,56 +337,81 @@
rc = ENOSPC;
goto err;
}
-   /* Get the control and single-data entries of the WQE. */
+   /* Get the control and data entries of the WQE. */
ctrl = (struct mlx4_wqe_ctrl_seg *)mlx4_get_send_wqe(sq, head_idx);
dseg = (struct mlx4_wqe_data_seg *)((uintptr_t)ctrl +
sizeof(struct mlx4_wqe_ctrl_seg));
-   /* Fill the data segment with buffer information. */
-   addr = rte_pktmbuf_mtod(pkt, uintptr_t);
-   rte_prefetch0((volatile void *)addr);
-   dseg->addr = rte_cpu_to_be_64(addr);
-   /* Memory region key for this memory pool. */
-   lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(pkt));
-   if (unlikely(lkey == (uint32_t)-1)) {
-   /* MR does not exist. */
-   DEBUG("%p: unable to get MP <-> MR association", (void *)txq);
+   /* Fill the data segments with buffer information. */
+   for (buf = pkt; buf != NULL; buf = buf->next, dseg++) {
+   addr = rte_pktmbuf_mtod(buf, uintptr_t);
+   rte_prefetch0((volatile void *)addr);
+   /* Handle WQE wraparound. */
+   if (unlikely(dseg >= (struct mlx4_wqe_data_seg *)sq->eob))
+   dseg = (struct mlx4_wqe_data_seg *)sq->buf;
+   dseg->addr = rte_cpu_to_be_64(addr);
+   /* Memory region key for this memory pool. */
+   lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
+   if (unlikely(lkey == (uint32_t)-1)) {
+   /* MR does not exist. */
+   DEBUG("%p: unable to get MP <-> MR association",
+ (void *)txq);
+   /*
+* Restamp entry in case of failure.
+* Make sure that size is written correctly
+* Note that we give ownership to the SW, not the HW.
+*/
+   ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+   mlx4_txq_stamp_freed_wqe(sq, head_idx,
+(sq->head & sq->txbb_cnt) ? 0 : 1);
+   rc = EFAULT;
+   goto err;
+   }
+   dseg->lkey = rte_cpu_to_be_32(lkey);
+   if (likely(buf->data_len)) {
+   byte_count = rte_cpu_to_be_32(buf->data_len);
+   } else {
+   /*
+* Zero length segment is treated as inline segment
+* with zero data.
+*/
+   byte_count = RTE_BE32(0x8000);
+   }
/*
-* Restamp entry in case of failure, make sure that size is
-* wr

[dpdk-dev] [PATCH v4 7/7] net/mlx4: add loopback Tx from VF

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

This patch adds loopback functionality used when the chip is a VF in order
to enable packet transmission between VFs and PF.

Signed-off-by: Moti Haimovsky 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_rxtx.c | 33 +
 drivers/net/mlx4/mlx4_rxtx.h |  1 +
 drivers/net/mlx4/mlx4_txq.c  |  2 ++
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 87c5261..36173ad 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -311,10 +311,13 @@ struct pv {
struct mlx4_wqe_data_seg *dseg;
struct mlx4_sq *sq = &txq->msq;
struct rte_mbuf *buf;
+   union {
+   uint32_t flags;
+   uint16_t flags16[2];
+   } srcrb;
uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
uint32_t lkey;
uintptr_t addr;
-   uint32_t srcrb_flags;
uint32_t owner_opcode = MLX4_OPCODE_SEND;
uint32_t byte_count;
int wqe_real_size;
@@ -414,22 +417,16 @@ struct pv {
/* Fill the control parameters for this packet. */
ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
/*
-* The caller should prepare "imm" in advance in order to support
-* VF to VF communication (when the device is a virtual-function
-* device (VF)).
-*/
-   ctrl->imm = 0;
-   /*
 * For raw Ethernet, the SOLICIT flag is used to indicate that no ICRC
 * should be calculated.
 */
txq->elts_comp_cd -= nr_txbbs;
if (unlikely(txq->elts_comp_cd <= 0)) {
txq->elts_comp_cd = txq->elts_comp_cd_init;
-   srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
+   srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
   MLX4_WQE_CTRL_CQ_UPDATE);
} else {
-   srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
+   srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
}
/* Enable HW checksum offload if requested */
if (txq->csum &&
@@ -443,14 +440,26 @@ struct pv {
owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
MLX4_WQE_CTRL_IL4_HDR_CSUM;
if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-   srcrb_flags |=
+   srcrb.flags |=
RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM);
} else {
-   srcrb_flags |= RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM |
+   srcrb.flags |= RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM |
MLX4_WQE_CTRL_TCP_UDP_CSUM);
}
}
-   ctrl->srcrb_flags = srcrb_flags;
+   if (txq->lb) {
+   /*
+* Copy destination MAC address to the WQE, this allows
+* loopback in eSwitch, so that VFs and PF can communicate
+* with each other.
+*/
+   srcrb.flags16[0] = *(rte_pktmbuf_mtod(pkt, uint16_t *));
+   ctrl->imm = *(rte_pktmbuf_mtod_offset(pkt, uint32_t *,
+ sizeof(uint16_t)));
+   } else {
+   ctrl->imm = 0;
+   }
+   ctrl->srcrb_flags = srcrb.flags;
/*
 * Make sure descriptor is fully written before
 * setting ownership bit (because HW can start
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 6aad41a..37f31f4 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -112,6 +112,7 @@ struct txq {
uint32_t max_inline; /**< Max inline send size. */
uint32_t csum:1; /**< Enable checksum offloading. */
uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
+   uint32_t lb:1; /**< Whether packets should be looped back by eSwitch. */
uint8_t *bounce_buf;
/**< Memory used for storing the first DWORD of data TXBBs. */
struct {
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 96429bc..9d1be95 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -412,6 +412,8 @@ struct txq_mp2mr_mbuf_check_data {
  (void *)dev, strerror(rte_errno));
goto error;
}
+   /* Enable Tx loopback for VF devices. */
+   tmpl.lb = !!(priv->vf);
/* Clean up txq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
mlx4_txq_cleanup(txq);
-- 
1.8.3.1



[dpdk-dev] [PATCH v4 5/7] net/mlx4: restore Tx checksum offloads

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

This patch adds hardware offloading support for IPv4, UDP and TCP checksum
calculation, including inner/outer checksums on supported tunnel types.

Signed-off-by: Moti Haimovsky 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4.c| 11 +++
 drivers/net/mlx4/mlx4.h|  2 ++
 drivers/net/mlx4/mlx4_ethdev.c |  6 ++
 drivers/net/mlx4/mlx4_prm.h|  2 ++
 drivers/net/mlx4/mlx4_rxtx.c   | 19 +++
 drivers/net/mlx4/mlx4_rxtx.h   |  2 ++
 drivers/net/mlx4/mlx4_txq.c|  2 ++
 7 files changed, 44 insertions(+)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index b084903..385ddaa 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -529,6 +529,17 @@ struct mlx4_conf {
priv->pd = pd;
priv->mtu = ETHER_MTU;
priv->vf = vf;
+   priv->hw_csum = !!(device_attr.device_cap_flags &
+  IBV_DEVICE_RAW_IP_CSUM);
+   DEBUG("checksum offloading is %ssupported",
+ (priv->hw_csum ? "" : "not "));
+   /* Only ConnectX-3 Pro supports tunneling. */
+   priv->hw_csum_l2tun =
+   priv->hw_csum &&
+   (device_attr.vendor_part_id ==
+PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO);
+   DEBUG("L2 tunnel checksum offloads are %ssupported",
+ (priv->hw_csum_l2tun ? "" : "not "));
/* Configure the first MAC address by default. */
if (mlx4_get_mac(priv, &mac.addr_bytes)) {
ERROR("cannot get MAC address, is mlx4_en loaded?"
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 93e5502..0b71867 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -104,6 +104,8 @@ struct priv {
unsigned int vf:1; /* This is a VF device. */
unsigned int intr_alarm:1; /* An interrupt alarm is scheduled. */
unsigned int isolated:1; /* Toggle isolated mode. */
+   unsigned int hw_csum:1; /* Checksum offload is supported. */
+   unsigned int hw_csum_l2tun:1; /* Checksum support for L2 tunnels. */
struct rte_intr_handle intr_handle; /* Port interrupt handle. */
struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
LIST_HEAD(mlx4_flows, rte_flow) flows;
diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
index a9e8059..bec1787 100644
--- a/drivers/net/mlx4/mlx4_ethdev.c
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -553,6 +553,12 @@
info->max_mac_addrs = 1;
info->rx_offload_capa = 0;
info->tx_offload_capa = 0;
+   if (priv->hw_csum)
+   info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_UDP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_CKSUM);
+   if (priv->hw_csum_l2tun)
+   info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
if (mlx4_get_ifname(priv, &ifname) == 0)
info->if_index = if_nametoindex(ifname);
info->speed_capa =
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index 085a595..df5a6b4 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -64,6 +64,8 @@
 
 /* Work queue element (WQE) flags. */
 #define MLX4_BIT_WQE_OWN 0x8000
+#define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)
+#define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
 
 #define MLX4_SIZE_TO_TXBBS(size) \
(RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT))
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index cc0baaa..fe7d5d0 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -431,6 +431,25 @@ struct pv {
} else {
srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
}
+   /* Enable HW checksum offload if requested */
+   if (txq->csum &&
+   (pkt->ol_flags &
+(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))) {
+   const uint64_t is_tunneled = (pkt->ol_flags &
+ (PKT_TX_TUNNEL_GRE |
+  PKT_TX_TUNNEL_VXLAN));
+
+   if (is_tunneled && txq->csum_l2tun) {
+   owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
+   MLX4_WQE_CTRL_IL4_HDR_CSUM;
+   if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+   srcrb_flags |=
+   RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM);
+   } else {
+   srcrb_flags |= RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM |
+   MLX4_WQE_CTRL_TCP_UDP_CSUM);
+   }
+   }
ctrl->srcrb_flags = srcrb_flags;
/*
 * Make sure desc

[dpdk-dev] [PATCH v4 6/7] net/mlx4: restore Rx offloads

2017-10-05 Thread Ophir Munk
From: Moti Haimovsky 

This patch adds hardware offloading support for IPV4, UDP and TCP checksum
verification, including inner/outer checksums on supported tunnel types.

It also restores packet type recognition support.

Signed-off-by: Vasily Philipov 
Signed-off-by: Moti Haimovsky 
Acked-by: Adrien Mazarguil 
---
 drivers/net/mlx4/mlx4_ethdev.c |   6 ++-
 drivers/net/mlx4/mlx4_prm.h|  30 +++
 drivers/net/mlx4/mlx4_rxq.c|   5 ++
 drivers/net/mlx4/mlx4_rxtx.c   | 118 -
 drivers/net/mlx4/mlx4_rxtx.h   |   2 +
 5 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
index bec1787..6dbf273 100644
--- a/drivers/net/mlx4/mlx4_ethdev.c
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -553,10 +553,14 @@
info->max_mac_addrs = 1;
info->rx_offload_capa = 0;
info->tx_offload_capa = 0;
-   if (priv->hw_csum)
+   if (priv->hw_csum) {
info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
  DEV_TX_OFFLOAD_UDP_CKSUM |
  DEV_TX_OFFLOAD_TCP_CKSUM);
+   info->rx_offload_capa |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM);
+   }
if (priv->hw_csum_l2tun)
info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
if (mlx4_get_ifname(priv, &ifname) == 0)
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index df5a6b4..0d76a73 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -70,6 +70,14 @@
 #define MLX4_SIZE_TO_TXBBS(size) \
(RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT))
 
+/* CQE checksum flags. */
+enum {
+   MLX4_CQE_L2_TUNNEL_IPV4 = (int)(1u << 25),
+   MLX4_CQE_L2_TUNNEL_L4_CSUM = (int)(1u << 26),
+   MLX4_CQE_L2_TUNNEL = (int)(1u << 27),
+   MLX4_CQE_L2_TUNNEL_IPOK = (int)(1u << 31),
+};
+
 /* Send queue information. */
 struct mlx4_sq {
uint8_t *buf; /**< SQ buffer. */
@@ -119,4 +127,26 @@ struct mlx4_cq {
   (cq->cqe_64 << 5));
 }
 
+/**
+ * Transpose a flag in a value.
+ *
+ * @param val
+ *   Input value.
+ * @param from
+ *   Flag to retrieve from input value.
+ * @param to
+ *   Flag to set in output value.
+ *
+ * @return
+ *   Output value with transposed flag enabled if present on input.
+ */
+static inline uint64_t
+mlx4_transpose(uint64_t val, uint64_t from, uint64_t to)
+{
+   return (from >= to ?
+   (val & from) / (from / to) :
+   (val & from) * (to / from));
+}
+
+
 #endif /* MLX4_PRM_H_ */
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 44d095d..a021a32 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -260,6 +260,11 @@
int ret;
 
(void)conf; /* Thresholds configuration (ignored). */
+   /* Toggle Rx checksum offload if hardware supports it. */
+   if (priv->hw_csum)
+   tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+   if (priv->hw_csum_l2tun)
+   tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
mb_len = rte_pktmbuf_data_room_size(mp);
if (desc == 0) {
rte_errno = EINVAL;
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index fe7d5d0..87c5261 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -557,6 +557,107 @@ struct pv {
 }
 
 /**
+ * Translate Rx completion flags to packet type.
+ *
+ * @param flags
+ *   Rx completion flags returned by mlx4_cqe_flags().
+ *
+ * @return
+ *   Packet type in mbuf format.
+ */
+static inline uint32_t
+rxq_cq_to_pkt_type(uint32_t flags)
+{
+   uint32_t pkt_type;
+
+   if (flags & MLX4_CQE_L2_TUNNEL)
+   pkt_type =
+   mlx4_transpose(flags,
+  MLX4_CQE_L2_TUNNEL_IPV4,
+  RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
+   mlx4_transpose(flags,
+  MLX4_CQE_STATUS_IPV4_PKT,
+  RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN);
+   else
+   pkt_type = mlx4_transpose(flags,
+ MLX4_CQE_STATUS_IPV4_PKT,
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
+   return pkt_type;
+}
+
+/**
+ * Translate Rx completion flags to offload flags.
+ *
+ * @param flags
+ *   Rx completion flags returned by mlx4_cqe_flags().
+ * @param csum
+ *   Whether Rx checksums are enabled.
+ * @param csum_l2tun
+ *   Whether Rx L2 tunnel checksums are enabled.
+ *
+ * @return
+ *   Offload flags (ol_flags) in mbuf format.
+ */
+static inline uint32_t
+rxq_cq_to_ol_flags(uin

Re: [dpdk-dev] [PATCH v2] test/crypto: remove crypto perf tests

2017-10-05 Thread Zhang, Roy Fan
> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pablo de Lara
> Sent: Wednesday, October 4, 2017 7:49 AM
> To: Doherty, Declan ;
> hemant.agra...@nxp.com; akhil.go...@nxp.com;
> jerin.ja...@caviumnetworks.com; Trahe, Fiona ;
> Jain, Deepak K ; Griffin, John
> 
> Cc: dev@dpdk.org; De Lara Guarch, Pablo 
> Subject: [dpdk-dev] [PATCH v2] test/crypto: remove crypto perf tests
> 
> Since the crypto perf application is flexible enough
> to cover all the crypto performance tests, these are not needed
> anymore, so they will be removed to avoid duplications.
> Besides, the crypto perf application gives the user more options
> to get performance, for every single supported algorithm,
> such as varying the buffer size as the user wants.
> 
> Signed-off-by: Pablo de Lara 
> ---
> 
> Changes in v2:
> - Rebased against latest code
> - Added note in release notes
 
Acked-by: Fan Zhang 


Re: [dpdk-dev] [PATCH 10/10] app/test-crypto-perf: fix compilation with -Og

2017-10-05 Thread De Lara Guarch, Pablo


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Olivier Matz
> Sent: Monday, September 11, 2017 4:14 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 10/10] app/test-crypto-perf: fix compilation
> with -Og
> 
> The compilation with gcc-6.3.0 and EXTRA_CFLAGS=-Og gives the following
> error:
> 
>   CC cperf_test_verify.o
> cperf_test_verify.c: In function ‘cperf_verify_op’:
> cperf_test_verify.c:382:5: error: ‘auth’ may be used uninitialized
>in this function
>[-Werror=maybe-uninitialized]
>   if (auth == 1) {
>  ^
> cperf_test_verify.c:371:5: error: ‘cipher’ may be used uninitialized
>in this function
>  [-Werror=maybe-uninitialized]
>   if (cipher == 1) {
>  ^
> cperf_test_verify.c:384:11: error: ‘auth_offset’ may be used
>   uninitialized in this function
>   [-Werror=maybe-uninitialized]
> res += memcmp(data + auth_offset,
>^~
>   vector->digest.data,
>   
>   options->digest_sz);
>   ~~~
> cperf_test_verify.c:377:11: error: ‘cipher_offset’ may be used
> uninitialized in this function
> [-Werror=maybe-uninitialized]
> res += memcmp(data + cipher_offset,
>^~~~
>   vector->plaintext.data,
>   ~~~
>   options->test_buffer_size);
>   ~~
> 
> There is no default case in the switch statement, so if options->op_type is
> an unknown value, the function will use uninitialized values. Fix it by adding
> a default.
> 
> Fixes: f8be1786b1b8 ("app/crypto-perf: introduce performance test
> application")
> 
> Signed-off-by: Olivier Matz 

Acked-by: Pablo de Lara 

Before applying this patch and patch 9, title should be renamed to
"app/crypto-perf", since that's the convention that we are using.

Thanks,
Pablo


Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy

2017-10-05 Thread Ananyev, Konstantin
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 8c08b8d..15a2fe9 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -241,5 +241,6 @@ EXPERIMENTAL {
>   rte_service_runstate_set;
>   rte_service_set_stats_enable;
>   rte_service_start_with_defaults;
> + rte_memcpy_ptr;
> 
>  } DPDK_17.08;

I am not an expert in DPDK versioning system,
But shouldn't we create a 17.11 section here?
Also I think an alphabetical order should be preserved here.
Konstantin


Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy

2017-10-05 Thread Ananyev, Konstantin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Ananyev, Konstantin
> Sent: Thursday, October 5, 2017 10:37 AM
> To: Li, Xiaoyun ; Richardson, Bruce 
> 
> Cc: Lu, Wenzhuo ; Zhang, Helin ; 
> dev@dpdk.org; Thomas Monjalon
> (thomas.monja...@6wind.com) 
> Subject: Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy
> 
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map 
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 8c08b8d..15a2fe9 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -241,5 +241,6 @@ EXPERIMENTAL {
> > rte_service_runstate_set;
> > rte_service_set_stats_enable;
> > rte_service_start_with_defaults;
> > +   rte_memcpy_ptr;
> >
> >  } DPDK_17.08;
> 
> I am not an expert in DPDK versioning system,
> But shouldn't we create a 17.11 section here?
> Also I think an alphabetical order should be preserved here.
> Konstantin


Re: [dpdk-dev] [PATCH v6 3/3] efd: run-time dispatch over x86 EFD functions

2017-10-05 Thread Ananyev, Konstantin


> +efd_value_t
> +efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx,
> + const efd_lookuptbl_t *group_lookup_table,
> + const uint32_t hash_val_a, const uint32_t hash_val_b)
> +{
> +#ifdef CC_SUPPORT_AVX2
> + efd_value_t value = 0;
> + uint32_t i = 0;
> + __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a);
> + __m256i vhash_val_b = _mm256_set1_epi32(hash_val_b);
> +
> + for (; i < RTE_EFD_VALUE_NUM_BITS; i += 8) {
> + __m256i vhash_idx =
> + _mm256_cvtepu16_epi32(EFD_LOAD_SI128(
> + (__m128i const *) &group_hash_idx[i]));
> + __m256i vlookup_table = _mm256_cvtepu16_epi32(
> + EFD_LOAD_SI128((__m128i const *)
> + &group_lookup_table[i]));
> + __m256i vhash = _mm256_add_epi32(vhash_val_a,
> + _mm256_mullo_epi32(vhash_idx, vhash_val_b));
> + __m256i vbucket_idx = _mm256_srli_epi32(vhash,
> + EFD_LOOKUPTBL_SHIFT);
> + __m256i vresult = _mm256_srlv_epi32(vlookup_table,
> + vbucket_idx);
> +
> + value |= (_mm256_movemask_ps(
> + (__m256) _mm256_slli_epi32(vresult, 31))
> + & ((1 << (RTE_EFD_VALUE_NUM_BITS - i)) - 1)) << i;
> + }
> +
> + return value;
> +#else
> + RTE_SET_USED(group_hash_idx);
> + RTE_SET_USED(group_lookup_table);
> + RTE_SET_USED(hash_val_a);
> + RTE_SET_USED(hash_val_b);
> + /* Return dummy value, only to avoid compilation breakage */
> + return 0;
> +#endif
> +
> +}

#ifdef CC_SUPPORT_AVX2 is still there.
Will wait for v7 I guess.
Konstantin


[dpdk-dev] [PATCH v1 0/7] Flow API helpers enhancements

2017-10-05 Thread Adrien Mazarguil
This series brings enhancements to various rte_flow helpers:

- Allow applications to use rte_flow_error_set() by making it part of the
  public interface and documenting it as such.

- Address rte_flow_copy()'s limitations by replacing it with the more
  versatile rte_flow_conv(). This new function allows retrieving other
  properties such as item/action names, enabling testpmd to finally use it
  and get rid of duplicated code.

- Add a script (gen-rte_flow_conv-h.sh) to help with generating the
  resources used by rte_flow_conv(). Developers should run it when adding or
  modifying pattern items or actions (done as part of this series to add the
  missing "fuzzy" pattern item).

- Future plans for rte_flow_conv() include translating error codes to
  human-readable messages, so applications do not have to make their own.

All these changes address concerns raised a couple of months ago [1]. Work
on these patches actually started at the time but I was unable to complete
and clean them up until recently.

[1] http://dpdk.org/ml/archives/dev/2017-July/070492.html

Adrien Mazarguil (7):
  ethdev: expose flow API error helper
  ethdev: replace flow API object copy function
  ethdev: add flow API item/action name conversion
  app/testpmd: rely on flow API conversion function
  ethdev: enhance flow API item/action descriptions
  ethdev: generate flow API conversion header
  ethdev: update flow API conversion header

 MAINTAINERS |   1 +
 app/test-pmd/config.c   | 293 ---
 app/test-pmd/testpmd.h  |   7 +-
 buildtools/gen-rte_flow_conv-h.sh   | 264 +
 doc/guides/prog_guide/rte_flow.rst  |  43 ++-
 drivers/net/failsafe/failsafe_ether.c   |   6 +-
 drivers/net/failsafe/failsafe_flow.c|  29 +-
 drivers/net/failsafe/failsafe_private.h |   4 +-
 drivers/net/mlx4/mlx4_flow.c|   6 +-
 drivers/net/tap/tap_flow.c  |   2 +-
 lib/librte_ether/Makefile   |  10 +
 lib/librte_ether/rte_ethdev_version.map |   1 +
 lib/librte_ether/rte_flow.c | 539 ---
 lib/librte_ether/rte_flow.h | 275 --
 lib/librte_ether/rte_flow_conv.h| 345 +
 lib/librte_ether/rte_flow_driver.h  |  38 --
 16 files changed, 1320 insertions(+), 543 deletions(-)
 create mode 100755 buildtools/gen-rte_flow_conv-h.sh
 create mode 100644 lib/librte_ether/rte_flow_conv.h

-- 
2.1.4



[dpdk-dev] [PATCH v1 2/7] ethdev: replace flow API object copy function

2017-10-05 Thread Adrien Mazarguil
rte_flow_copy() is bound to duplicate flow rule descriptions (attributes,
pattern and list of actions, all at once), however applications sometimes
need more freedom, for instance the ability to duplicate only one of the
underlying objects (a single pattern item or action) or retrieve other
properties such as their names.

Instead of adding dedicated functions to handle each possible use case,
this patch introduces rte_flow_conv(). This function supports any number of
object conversion operations in an extensible manner.

While rte_flow_copy() could be re-implemented through rte_flow_conv(), it
is removed without prior notice; it is assumed that this function has no
users besides the fail-safe PMD, therefore no effort is made to maintain
it.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/prog_guide/rte_flow.rst  |  19 ++
 drivers/net/failsafe/failsafe_ether.c   |   6 +-
 drivers/net/failsafe/failsafe_flow.c|  29 ++-
 drivers/net/failsafe/failsafe_private.h |   4 +-
 lib/librte_ether/rte_ethdev_version.map |   1 +
 lib/librte_ether/rte_flow.c | 296 ++-
 lib/librte_ether/rte_flow.h | 193 ++---
 7 files changed, 404 insertions(+), 144 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 565a809..5026730 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1697,6 +1697,25 @@ This function initializes ``error`` (if non-NULL) with 
the provided
 parameters and sets ``rte_errno`` to ``code``. A negative error ``code`` is
 then returned.
 
+Object conversion
+~
+
+.. code-block:: c
+
+   int
+   rte_flow_conv(enum rte_flow_conv_op op,
+ void *dst,
+ size_t size,
+ const void *src,
+ struct rte_flow_error *error);
+
+Convert ``src`` to ``dst`` according to operation ``op``. Possible
+operations include:
+
+- Attributes, pattern item or action duplication.
+- Duplication of an entire pattern or list of actions.
+- Duplication of a complete flow rule description.
+
 Caveats
 ---
 
diff --git a/drivers/net/failsafe/failsafe_ether.c 
b/drivers/net/failsafe/failsafe_ether.c
index 0c0748f..80f391a 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -257,9 +257,9 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
DEBUG("Creating flow #%" PRIu32, i++);
flow->flows[SUB_ID(sdev)] =
rte_flow_create(PORT_ID(sdev),
-   &flow->fd->attr,
-   flow->fd->items,
-   flow->fd->actions,
+   flow->fd.attr,
+   flow->fd.pattern,
+   flow->fd.actions,
&ferror);
ret = rte_errno;
if (ret)
diff --git a/drivers/net/failsafe/failsafe_flow.c 
b/drivers/net/failsafe/failsafe_flow.c
index 153ceee..a568a8b 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -31,8 +31,11 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include 
+#include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -46,19 +49,31 @@ fs_flow_allocate(const struct rte_flow_attr *attr,
 const struct rte_flow_action *actions)
 {
struct rte_flow *flow;
-   size_t fdsz;
+   const struct rte_flow_conv_rule rule = {
+   { attr }, { items }, { actions },
+   };
+   struct rte_flow_error error;
+   int ret;
 
-   fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
-   flow = rte_zmalloc(NULL,
-  sizeof(struct rte_flow) + fdsz,
+   ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error);
+   if (ret < 0) {
+   ERROR("Unable to compute flow description size (%s): %s",
+ error.message ? error.message : "unspecified",
+ strerror(rte_errno));
+   return NULL;
+   }
+   flow = rte_zmalloc(NULL, offsetof(struct rte_flow, fd) + ret,
   RTE_CACHE_LINE_SIZE);
if (flow == NULL) {
ERROR("Could not allocate new flow");
return NULL;
}
-   flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
-   if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
-   ERROR("Failed to copy flow description");
+   ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->fd, ret, &rule,
+   &error);
+   if (ret < 0) {
+   ERROR("Failed to copy flow description (%s): %s",
+ 

[dpdk-dev] [PATCH v1 1/7] ethdev: expose flow API error helper

2017-10-05 Thread Adrien Mazarguil
rte_flow_error_set() is a convenient helper to initialize error objects.

Since there is no fundamental reason to prevent applications from using it,
expose it through the public interface after modifying its return value
from positive to negative. This is done for consistency with the rest of
the public interface.

Documentation is updated accordingly.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/prog_guide/rte_flow.rst | 23 +---
 drivers/net/mlx4/mlx4_flow.c   |  6 +++---
 drivers/net/tap/tap_flow.c |  2 +-
 lib/librte_ether/rte_flow.c| 30 +-
 lib/librte_ether/rte_flow.h| 36 +++
 lib/librte_ether/rte_flow_driver.h | 38 -
 6 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 662a912..565a809 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1678,6 +1678,25 @@ freed by the application, however its pointer can be 
considered valid only
 as long as its associated DPDK port remains configured. Closing the
 underlying device or unloading the PMD invalidates it.
 
+Helpers
+---
+
+Error initializer
+~
+
+.. code-block:: c
+
+   static inline int
+   rte_flow_error_set(struct rte_flow_error *error,
+  int code,
+  enum rte_flow_error_type type,
+  const void *cause,
+  const char *message);
+
+This function initializes ``error`` (if non-NULL) with the provided
+parameters and sets ``rte_errno`` to ``code``. A negative error ``code`` is
+then returned.
+
 Caveats
 ---
 
@@ -1743,13 +1762,11 @@ the legacy filtering framework, which should eventually 
disappear.
   whatsoever). They only make sure these callbacks are non-NULL or return
   the ``ENOSYS`` (function not supported) error.
 
-This interface additionally defines the following helper functions:
+This interface additionally defines the following helper function:
 
 - ``rte_flow_ops_get()``: get generic flow operations structure from a
   port.
 
-- ``rte_flow_error_set()``: initialize generic flow error structure.
-
 More will be added over time.
 
 Device compatibility
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 0885a91..018843b 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -955,9 +955,9 @@ mlx4_flow_isolate(struct rte_eth_dev *dev,
mlx4_mac_addr_del(priv);
} else if (mlx4_mac_addr_add(priv) < 0) {
priv->isolated = 1;
-   return -rte_flow_error_set(error, rte_errno,
-  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-  NULL, "cannot leave isolated mode");
+   return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "cannot leave isolated mode");
}
return 0;
 }
diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c
index eefa868..a790946 100644
--- a/drivers/net/tap/tap_flow.c
+++ b/drivers/net/tap/tap_flow.c
@@ -1447,7 +1447,7 @@ tap_flow_isolate(struct rte_eth_dev *dev,
return 0;
 error:
pmd->flow_isolate = 0;
-   return -rte_flow_error_set(
+   return rte_flow_error_set(
error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"TC rule creation failed");
 }
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index 2001fbb..34ce516 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -145,9 +145,9 @@ rte_flow_validate(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->validate))
return ops->validate(dev, attr, pattern, actions, error);
-   return -rte_flow_error_set(error, ENOSYS,
-  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-  NULL, rte_strerror(ENOSYS));
+   return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
 }
 
 /* Create a flow rule on a given port. */
@@ -183,9 +183,9 @@ rte_flow_destroy(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->destroy))
return ops->destroy(dev, flow, error);
-   return -rte_flow_error_set(error, ENOSYS,
-  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-  NULL, rte_strerror(ENOSYS));
+   return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
 }
 
 /* Destroy all flow rules associated with a port

[dpdk-dev] [PATCH v1 3/7] ethdev: add flow API item/action name conversion

2017-10-05 Thread Adrien Mazarguil
This provides a means for applications to retrieve the name of flow pattern
items and actions.

Signed-off-by: Adrien Mazarguil 
---
 doc/guides/prog_guide/rte_flow.rst |  1 +
 lib/librte_ether/rte_flow.c| 62 +
 lib/librte_ether/rte_flow.h| 52 +++
 3 files changed, 115 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 5026730..a346ba5 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1715,6 +1715,7 @@ operations include:
 - Attributes, pattern item or action duplication.
 - Duplication of an entire pattern or list of actions.
 - Duplication of a complete flow rule description.
+- Pattern item or action name retrieval.
 
 Caveats
 ---
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index 03a4d35..f4fb607 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -465,6 +465,60 @@ rte_flow_conv_rule(struct rte_flow_conv_rule *dst,
 "not enough room for alignment padding");
 }
 
+/** Internal helper to convert an object type to a string. */
+static int
+rte_flow_conv_name(int is_action,
+  char *dst,
+  size_t size,
+  const void *src,
+  struct rte_flow_error *error)
+{
+   const struct {
+   const struct rte_flow_desc_data *data;
+   size_t num;
+   } res_data[2] = {
+   { rte_flow_desc_item, RTE_DIM(rte_flow_desc_item), },
+   { rte_flow_desc_action, RTE_DIM(rte_flow_desc_action), },
+   }, *const res = &res_data[!!is_action];
+   unsigned int obj_type = (uintptr_t)src;
+
+   if (obj_type >= res->num)
+   return rte_flow_error_set
+   (error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+"unknown object type to retrieve name for");
+   return snprintf(dst, size, "%s", res->data[obj_type].name);
+}
+
+/** Internal helper to convert an object type to a pointer to its name. */
+static int
+rte_flow_conv_name_ptr(int is_action,
+  const char **dst,
+  size_t size,
+  const void *src,
+  struct rte_flow_error *error)
+{
+   const struct {
+   const struct rte_flow_desc_data *data;
+   size_t num;
+   } res_data[2] = {
+   { rte_flow_desc_item, RTE_DIM(rte_flow_desc_item), },
+   { rte_flow_desc_action, RTE_DIM(rte_flow_desc_action), },
+   }, *const res = &res_data[!!is_action];
+   unsigned int obj_type = (uintptr_t)src;
+
+   if (obj_type >= res->num)
+   return rte_flow_error_set
+   (error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+"unknown object type to retrieve name for");
+   if (size && size < sizeof(const char **))
+   return rte_flow_error_set
+   (error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+"not enough room for object name pointer");
+   if (size)
+   *((const char **)dst) = res->data[obj_type].name;
+   return sizeof(const char **);
+}
+
 /** Helper function to convert flow API objects. */
 int
 rte_flow_conv(enum rte_flow_conv_op op,
@@ -497,6 +551,14 @@ rte_flow_conv(enum rte_flow_conv_op op,
return rte_flow_conv_actions(dst, size, src, 0, error);
case RTE_FLOW_CONV_OP_RULE:
return rte_flow_conv_rule(dst, size, src, error);
+   case RTE_FLOW_CONV_OP_ITEM_NAME:
+   return rte_flow_conv_name(0, dst, size, src, error);
+   case RTE_FLOW_CONV_OP_ACTION_NAME:
+   return rte_flow_conv_name(1, dst, size, src, error);
+   case RTE_FLOW_CONV_OP_ITEM_NAME_PTR:
+   return rte_flow_conv_name_ptr(0, dst, size, src, error);
+   case RTE_FLOW_CONV_OP_ACTION_NAME_PTR:
+   return rte_flow_conv_name_ptr(1, dst, size, src, error);
}
return rte_flow_error_set
(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 3d06bdc..01c711e 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1180,6 +1180,58 @@ enum rte_flow_conv_op {
 *   @code struct rte_flow_conv_rule * @endcode
 */
RTE_FLOW_CONV_OP_RULE,
+
+   /**
+* Convert item type to its name string.
+*
+* Writes a NUL-terminated string to @p dst (like snprintf()).
+*
+* - @p src type:
+*   @code (const void *)enum rte_flow_item_type @endcode
+* - @p dst type:
+*   @code char * @endcode
+**/
+   RTE_FLOW_CONV_OP_ITEM_NAME,
+
+   /**
+* Convert action type to its name string.
+*
+* Writes a 

[dpdk-dev] [PATCH v1 4/7] app/testpmd: rely on flow API conversion function

2017-10-05 Thread Adrien Mazarguil
This commit replaces all local information about pattern items and actions
as well as flow rule duplication code with calls to rte_flow_conv().

Signed-off-by: Adrien Mazarguil 
---
 app/test-pmd/config.c  | 293 ++--
 app/test-pmd/testpmd.h |   7 +-
 2 files changed, 66 insertions(+), 234 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 60a8d07..11b9a0f 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -918,213 +918,36 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
 
 /* Generic flow management functions. */
 
-/** Generate flow_item[] entry. */
-#define MK_FLOW_ITEM(t, s) \
-   [RTE_FLOW_ITEM_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow pattern items. */
-static const struct {
-   const char *name;
-   size_t size;
-} flow_item[] = {
-   MK_FLOW_ITEM(END, 0),
-   MK_FLOW_ITEM(VOID, 0),
-   MK_FLOW_ITEM(INVERT, 0),
-   MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
-   MK_FLOW_ITEM(PF, 0),
-   MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-   MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
-   MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
-   MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
-   MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
-   MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
-   MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
-   MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
-   MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
-   MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
-   MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
-   MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-   MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
-   MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
-   MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
-   MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
-   MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
-};
-
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-   size_t *size, size_t *pad)
-{
-   if (!item->spec) {
-   *size = 0;
-   goto empty;
-   }
-   switch (item->type) {
-   union {
-   const struct rte_flow_item_raw *raw;
-   } spec;
-
-   case RTE_FLOW_ITEM_TYPE_RAW:
-   spec.raw = item->spec;
-   *size = offsetof(struct rte_flow_item_raw, pattern) +
-   spec.raw->length * sizeof(*spec.raw->pattern);
-   break;
-   default:
-   *size = flow_item[item->type].size;
-   break;
-   }
-empty:
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
-}
-
-/** Generate flow_action[] entry. */
-#define MK_FLOW_ACTION(t, s) \
-   [RTE_FLOW_ACTION_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow actions. */
-static const struct {
-   const char *name;
-   size_t size;
-} flow_action[] = {
-   MK_FLOW_ACTION(END, 0),
-   MK_FLOW_ACTION(VOID, 0),
-   MK_FLOW_ACTION(PASSTHRU, 0),
-   MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
-   MK_FLOW_ACTION(FLAG, 0),
-   MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
-   MK_FLOW_ACTION(DROP, 0),
-   MK_FLOW_ACTION(COUNT, 0),
-   MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
-   MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
-   MK_FLOW_ACTION(PF, 0),
-   MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
-};
-
-/** Compute storage space needed by action configuration. */
-static void
-flow_action_conf_size(const struct rte_flow_action *action,
- size_t *size, size_t *pad)
-{
-   if (!action->conf) {
-   *size = 0;
-   goto empty;
-   }
-   switch (action->type) {
-   union {
-   const struct rte_flow_action_rss *rss;
-   } conf;
-
-   case RTE_FLOW_ACTION_TYPE_RSS:
-   conf.rss = action->conf;
-   *size = offsetof(struct rte_flow_action_rss, queue) +
-   conf.rss->num * sizeof(*conf.rss->queue);
-   break;
-   default:
-   *size = flow_action[action->type].size;
-   break;
-   }
-empty:
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
-}
-
 /** Generate a port_flow entry from attributes/pattern/actions. */
 static struct port_flow *
 port_flow_new(const struct rte_flow_attr *attr,
  const struct rte_flow_item *pattern,
- const struct rt

[dpdk-dev] [PATCH v1 6/7] ethdev: generate flow API conversion header

2017-10-05 Thread Adrien Mazarguil
Add script and build target to generate rte_flow_conv.h (used by
rte_flow_conv()) from rte_flow.h.

Although the resulting header file is internal and not public, this is not
done automatically since it is versioned in the source tree.

Developers can update it after making changes to rte_flow.h by running:

 make lib/librte_ether_sub RTE_MAKE_SUBTARGET=rte_flow_conv.h

Signed-off-by: Adrien Mazarguil 
---
 MAINTAINERS   |   1 +
 buildtools/gen-rte_flow_conv-h.sh | 264 +
 lib/librte_ether/Makefile |  10 ++
 3 files changed, 275 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9eec984..002b54e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -253,6 +253,7 @@ F: devtools/test-null.sh
 
 Flow API
 M: Adrien Mazarguil 
+F: buildtools/gen-rte_flow_conv-h.sh
 F: lib/librte_ether/rte_flow*
 
 Traffic Management API - EXPERIMENTAL
diff --git a/buildtools/gen-rte_flow_conv-h.sh 
b/buildtools/gen-rte_flow_conv-h.sh
new file mode 100755
index 000..482e733
--- /dev/null
+++ b/buildtools/gen-rte_flow_conv-h.sh
@@ -0,0 +1,264 @@
+#!/bin/sh -e
+#
+#   BSD LICENSE
+#
+#   Copyright 2017 6WIND S.A.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This script generates an internal flow API header file needed by
+# conversion function rte_flow_conv().
+#
+# This is done by feeding rte_flow.h to the C preprocessor and transforming
+# its output into a different C header file whose name is provided as a
+# command-line argument.
+#
+# CC, CPPFLAGS, CFLAGS, EXTRA_CPPFLAGS and EXTRA_CFLAGS are taken from the
+# environment.
+
+# Check command-line parameters and environment.
+: ${CC:=cc}
+output=${1:?missing output file name}
+
+# Generate pattern for C punctuators.
+punctuators=$(printf '%s' \
+'[ ] ( ) { } . ->
+++ -- & * + - ~ !
+/ % << >> < > <= >= == != ^ | && ||
+? : ; ...
+= *= /= %= += -= <<= >>= &= ^= |=
+, # ##
+<: :> <% %> %: %: %:' |
+   tr '\n' ' ' |
+   sed -ne 's/[[/.*\?]/\\&/g' -e 's/[[:space:]]\+/\\|/gp')
+
+# Generate include guard.
+guard=$(printf '%s_' "${output##*/}" |
+   tr [[:lower:]] [[:upper:]] |
+   sed -e 's/[^A-Z0-9]/_/g' -e 's/^.\{,4\}$//')
+
+# Retrieve C preprocessor output and expand it to one token per line.
+preprocess ()
+{
+   {
+   temp=/tmp/${0##*/}.$$.c
+   printf '#include "rte_flow.h"' > "$temp"
+   ${CC} ${CPPFLAGS} ${EXTRA_CPPFLAGS} \
+   ${CFLAGS} ${EXTRA_CFLAGS} -E "$temp"
+   rm -f "$temp"
+   } |
+   sed -e '
+/^[[:space:]]*#/d
+s/[[:space:]]\+/\n/g
+s/'"$punctuators"'/\n&\n/g
+' |
+   sed -e '/^[[:space:]]*$/d'
+}
+
+# Retrieve defined pattern items and actions.
+items=''
+actions=''
+while read -r type name dummy
+do
+   case "$type" in
+   ITEM)
+   items="$items $name"
+   ;;
+   ACTION)
+   actions="$actions $name"
+   ;;
+   esac
+done < "${output}" ||
+exit
+
+# Reuse license header from this script.
+sed -ne '
+/^#.*BSD LICENSE/{
+   i\
+/*-
+   :a
+   /^#/!{
+   i\
+ */
+   q
+   }
+   s/^#/ */
+   p
+   n
+   ba
+}
+' "$0"
+
+# Output includes and structure definitions.
+printf '
+/**
+ * @file
+ * RTE generic flow API (internal)
+ *
+ * This file exports resources needed by rte_flow_conv().
+ *
+ * DO NOT EDIT THIS FILE.
+ *
+ * It was generated from rte_flow.h, run %s to update it.
+ */
+
+#if

[dpdk-dev] [PATCH v1 7/7] ethdev: update flow API conversion header

2017-10-05 Thread Adrien Mazarguil
Synchronize rte_flow_conv() capabilities with rte_flow.h.

Signed-off-by: Adrien Mazarguil 
---
 lib/librte_ether/rte_flow_conv.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/lib/librte_ether/rte_flow_conv.h b/lib/librte_ether/rte_flow_conv.h
index 2244970..7e165a2 100644
--- a/lib/librte_ether/rte_flow_conv.h
+++ b/lib/librte_ether/rte_flow_conv.h
@@ -35,6 +35,10 @@
  * RTE generic flow API (internal)
  *
  * This file exports resources needed by rte_flow_conv().
+ *
+ * DO NOT EDIT THIS FILE.
+ *
+ * It was generated from rte_flow.h, run gen-rte_flow_conv-h.sh to update it.
  */
 
 #ifndef RTE_FLOW_CONV_H_
@@ -228,6 +232,14 @@ static const struct rte_flow_conv_res 
rte_flow_conv_res_item[] = {
.flex_elt_size = 0,
.flex_off = 0,
},
+   [RTE_FLOW_ITEM_TYPE_FUZZY] = {
+   .name = "fuzzy",
+   .size = sizeof(struct rte_flow_item_fuzzy),
+   .flex_len_type = 0,
+   .flex_len_off = 0,
+   .flex_elt_size = 0,
+   .flex_off = 0,
+   },
 };
 
 /** Actions description table. */
-- 
2.1.4



[dpdk-dev] [PATCH v1 5/7] ethdev: enhance flow API item/action descriptions

2017-10-05 Thread Adrien Mazarguil
Enhance description structure with information about embedded flexible
arrays in order to handle items and actions through a common function
without any dedicated code for special cases.

This commit also moves descriptions to a separate header file for clarity.

Signed-off-by: Adrien Mazarguil 
---
 lib/librte_ether/rte_flow.c  | 183 ++-
 lib/librte_ether/rte_flow_conv.h | 333 ++
 2 files changed, 392 insertions(+), 124 deletions(-)

diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index f4fb607..46f430e 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -37,75 +37,14 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include "rte_ethdev.h"
+#include "rte_flow_conv.h"
 #include "rte_flow_driver.h"
 #include "rte_flow.h"
 
-/**
- * Flow elements description tables.
- */
-struct rte_flow_desc_data {
-   const char *name;
-   size_t size;
-};
-
-/** Generate flow_item[] entry. */
-#define MK_FLOW_ITEM(t, s) \
-   [RTE_FLOW_ITEM_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow pattern items. */
-static const struct rte_flow_desc_data rte_flow_desc_item[] = {
-   MK_FLOW_ITEM(END, 0),
-   MK_FLOW_ITEM(VOID, 0),
-   MK_FLOW_ITEM(INVERT, 0),
-   MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
-   MK_FLOW_ITEM(PF, 0),
-   MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-   MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
-   MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
-   MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
-   MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
-   MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
-   MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
-   MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
-   MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
-   MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
-   MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
-   MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-   MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
-   MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
-   MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
-   MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
-};
-
-/** Generate flow_action[] entry. */
-#define MK_FLOW_ACTION(t, s) \
-   [RTE_FLOW_ACTION_TYPE_ ## t] = { \
-   .name = # t, \
-   .size = s, \
-   }
-
-/** Information about known flow actions. */
-static const struct rte_flow_desc_data rte_flow_desc_action[] = {
-   MK_FLOW_ACTION(END, 0),
-   MK_FLOW_ACTION(VOID, 0),
-   MK_FLOW_ACTION(PASSTHRU, 0),
-   MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
-   MK_FLOW_ACTION(FLAG, 0),
-   MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
-   MK_FLOW_ACTION(DROP, 0),
-   MK_FLOW_ACTION(COUNT, 0),
-   MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
-   MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
-   MK_FLOW_ACTION(PF, 0),
-   MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
-};
-
 /* Get generic flow operations structure from a port. */
 const struct rte_flow_ops *
 rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
@@ -243,59 +182,39 @@ rte_flow_isolate(uint8_t port_id,
  NULL, rte_strerror(ENOSYS));
 }
 
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-   size_t *size, size_t *pad)
-{
-   if (!item->spec) {
-   *size = 0;
-   goto empty;
-   }
-   switch (item->type) {
-   union {
-   const struct rte_flow_item_raw *raw;
-   } spec;
-
-   /* Not a fall-through */
-   case RTE_FLOW_ITEM_TYPE_RAW:
-   spec.raw = item->spec;
-   *size = offsetof(struct rte_flow_item_raw, pattern) +
-   spec.raw->length * sizeof(*spec.raw->pattern);
-   break;
-   default:
-   *size = rte_flow_desc_item[item->type].size;
-   break;
-   }
-empty:
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
-}
-
-/** Compute storage space needed by action configuration. */
+/** Compute storage space needed for item->spec or action->conf. */
 static void
-flow_action_conf_size(const struct rte_flow_action *action,
- size_t *size, size_t *pad)
+rte_flow_conv_res_size(const struct rte_flow_conv_res *res, const void *obj,
+  size_t *size, size_t *pad)
 {
-   if (!action->conf) {
+   if (!obj) {
*size = 0;
-   goto empty;
-

Re: [dpdk-dev] [PATCH v4 2/9] lib/librte_power: add extra msg type for policies

2017-10-05 Thread Hunt, David



On 5/10/2017 10:21 AM, santosh wrote:

Hi David,


On Thursday 05 October 2017 02:08 PM, Hunt, David wrote:

Hi Santosh,

On 4/10/2017 4:36 PM, santosh wrote:

Hi David,


On Wednesday 04 October 2017 02:45 PM, David Hunt wrote:

Signed-off-by: Nemanja Marjanovic 
Signed-off-by: Rory Sexton 
Signed-off-by: David Hunt 
---

my 2cent:
General comment on implementation approach:
IMO, we should avoid PMD details in common lib area.
example: file channel_commons.h has ifdef clutter referencing
i40e pmds all over.

Perhaps we should introduce opaque handle example void * or introduce pmd
specific callback/handle which points to PMD specific metadata in power library.

Example:
struct channel_packet {
void *pmd_specific_metadata;
}

Or someway via callback (I'm not sure at the moment)
so that we could hide PMD details in common area.

Thanks.

I would agree that PMD specific details are good left to the PMDs, however I 
think that the initial
example should be OK as is, and as new PMDs are added, we can find commonality 
between them
which stays in the example, and any really specific stuff can be pushed back 
behind an opaque.

What about the v5 I submitted (without the #ifdef's)? Are you OK with that for 
this release, and we can
fine tune as other PMDS are added in future releases?


Yes. But in future releases, we should do more code clean up in power lib and 
example area..
meaning; current example implementation uses names like _vsi.. specific to 
intel NICs,
we should remove such naming and their dependency code from example area.

Thanks.


I agree. I plan to clean up the API in the next release of DPDK. For 
exmaple, there are private header files that are called rte_*.h that 
expose private functions to the documentation. These need to be renamed, 
as well as moving some structures around. I can also look at re-naming 
some of the vsi vars to something more generic.

Thanks,
Dave.




Re: [dpdk-dev] [PATCH v3] vhost: Expose virtio interrupt need on rte_vhost API

2017-10-05 Thread Jens Freimann

Adding maintainers to Cc

On Sat, Sep 23, 2017 at 08:31:37PM +, Jan Scheurich wrote:
[...]

Today this information is hidden inside the rte_vhost library
and not accessible to users of the API. This patch adds a
function to the API to query it.


Since you add to the API please also document
it in doc/guides/prog_guide/vhost_lib.rst. 


Apart from that the patch looks good to me.

regards,
Jens 


Re: [dpdk-dev] [PATCH v6 3/3] efd: run-time dispatch over x86 EFD functions

2017-10-05 Thread Li, Xiaoyun
Yes. Sorry about that.

> -Original Message-
> From: Ananyev, Konstantin
> Sent: Thursday, October 5, 2017 17:41
> To: Li, Xiaoyun ; Richardson, Bruce
> 
> Cc: Lu, Wenzhuo ; Zhang, Helin
> ; dev@dpdk.org
> Subject: RE: [PATCH v6 3/3] efd: run-time dispatch over x86 EFD functions
> 
> 
> 
> > +efd_value_t
> > +efd_lookup_internal_avx2(const efd_hashfunc_t *group_hash_idx,
> > +   const efd_lookuptbl_t *group_lookup_table,
> > +   const uint32_t hash_val_a, const uint32_t hash_val_b)
> { #ifdef
> > +CC_SUPPORT_AVX2
> > +   efd_value_t value = 0;
> > +   uint32_t i = 0;
> > +   __m256i vhash_val_a = _mm256_set1_epi32(hash_val_a);
> > +   __m256i vhash_val_b = _mm256_set1_epi32(hash_val_b);
> > +
> > +   for (; i < RTE_EFD_VALUE_NUM_BITS; i += 8) {
> > +   __m256i vhash_idx =
> > +   _mm256_cvtepu16_epi32(EFD_LOAD_SI128(
> > +   (__m128i const *) &group_hash_idx[i]));
> > +   __m256i vlookup_table = _mm256_cvtepu16_epi32(
> > +   EFD_LOAD_SI128((__m128i const *)
> > +   &group_lookup_table[i]));
> > +   __m256i vhash = _mm256_add_epi32(vhash_val_a,
> > +   _mm256_mullo_epi32(vhash_idx,
> vhash_val_b));
> > +   __m256i vbucket_idx = _mm256_srli_epi32(vhash,
> > +   EFD_LOOKUPTBL_SHIFT);
> > +   __m256i vresult = _mm256_srlv_epi32(vlookup_table,
> > +   vbucket_idx);
> > +
> > +   value |= (_mm256_movemask_ps(
> > +   (__m256) _mm256_slli_epi32(vresult, 31))
> > +   & ((1 << (RTE_EFD_VALUE_NUM_BITS - i)) - 1)) << i;
> > +   }
> > +
> > +   return value;
> > +#else
> > +   RTE_SET_USED(group_hash_idx);
> > +   RTE_SET_USED(group_lookup_table);
> > +   RTE_SET_USED(hash_val_a);
> > +   RTE_SET_USED(hash_val_b);
> > +   /* Return dummy value, only to avoid compilation breakage */
> > +   return 0;
> > +#endif
> > +
> > +}
> 
> #ifdef CC_SUPPORT_AVX2 is still there.
> Will wait for v7 I guess.
> Konstantin


[dpdk-dev] [PATCH v7 0/6] Support TCP/IPv4, VxLAN, and GRE GSO in DPDK

2017-10-05 Thread Mark Kavanagh
Generic Segmentation Offload (GSO) is a SW technique to split large
packets into small ones. Akin to TSO, GSO enables applications to
operate on large packets, thus reducing per-packet processing overhead.

To enable more flexibility to applications, DPDK GSO is implemented
as a standalone library. Applications explicitly use the GSO library
to segment packets. This patch adds GSO support to DPDK for specific
packet types: specifically, TCP/IPv4, VxLAN, and GRE.

The first patch introduces the GSO API framework. The second patch
adds GSO support for TCP/IPv4 packets (containing an optional VLAN
tag). The third patch adds GSO support for VxLAN packets that contain
outer IPv4, and inner TCP/IPv4 headers (plus optional inner and/or 
outer VLAN tags). The fourth patch adds GSO support for GRE packets
that contain outer IPv4, and inner TCP/IPv4 headers (with optional 
outer VLAN tag). The fifth patch in the series enables TCP/IPv4, VxLAN,
and GRE GSO in testpmd's checksum forwarding engine. The final patch
in the series adds GSO documentation to the programmer's guide.

Performance Testing
===
The performance of TCP/IPv4 GSO on a 10Gbps link is demonstrated using
iperf. Setup for the test is described as follows:

a. Connect 2 x 10Gbps physical ports (P0, P1), which are in the same
   machine, together physically.
b. Launch testpmd with P0 and a vhost-user port, and use csum
   forwarding engine with "retry".
c. Select IP and TCP HW checksum calculation for P0; select TCP HW
   checksum calculation for vhost-user port.
d. Launch a VM with csum and tso offloading enabled.
e. Run iperf-client on virtio-net port in the VM to send TCP packets.
   With enabling csum and tso, the VM can send large TCP/IPv4 packets
   (mss is up to 64KB).
f. P1 is assigned to linux kernel and enabled kernel GRO. Run
   iperf-server on P1.

We conduct three iperf tests:

test-1: enable GSO for P0 in testpmd, and set max GSO segment length
to 1518B. Run two iperf-client in the VM.
test-2: enable TSO for P0 in testpmd, and set TSO segsz to 1518B. Run
two iperf-client in the VM.
test-3: disable GSO and TSO in testpmd. Run two iperf-client in the VM.

Throughput of the above three tests:

test-1: 9.4Gbps
test-2: 9.5Gbps
test-3: 3Mbps

Functional Testing
==
Unlike TCP packets, VMs can't send large VxLAN or GRE packets. The max
length of tunneled packets from VMs is 1514B. So current experiment
method can't be used to measure VxLAN and GRE GSO performance, but simply
test the functionality via setting small GSO segment length (e.g. 500B).

VxLAN
-
To test VxLAN GSO functionality, we use the following setup:

a. Connect 2 x 10Gbps physical ports (P0, P1), which are in the same
   machine, together physically.
b. Launch testpmd with P0 and a vhost-user port, and use csum forwarding
   engine with "retry".
c. Testpmd commands:
- csum parse_tunnel on "P0"
- csum parse_tunnel on "vhost-user port"
- csum set outer-ip hw "P0"
- csum set ip hw "P0"
- csum set tcp hw "P0"
- csum set tcp hw "vhost-user port"
- set port "P0" gso on
- set gso segsz 500
d. Launch a VM with csum and tso offloading enabled.
e. Create a vxlan port for the virtio-net port in the VM. Run iperf-client
   on the VxLAN port, so TCP packets are VxLAN encapsulated. However, the
   max packet length is 1514B.
f. P1 is assigned to linux kernel and kernel GRO is disabled. Similarly,
   create a VxLAN port for P1, and run iperf-server on the VxLAN port.

In testpmd, we can see the length of all packets sent from P0 is smaller
than or equal to 500B. Additionally, the packets arriving in P1 is
encapsulated and is smaller than or equal to 500B.

GRE
---
The same process may be used to test GRE functionality, with the exception that
the tunnel type created for both the guest's virtio-net, and the host's kernel
interfaces is GRE:
   `ip tunnel add  mode gre remote  local `

As in the VxLAN testcase, the length of packets sent from P0, and received on
P1, is less than 500B.

Change log
==
v7:
- add RTE_GSO_SEG_SIZE_MIN macro; use this to validate gso_ctx.gso_segsz.
- rename 'ipid_flag' member of gso_ctx to 'flag'.
- remove mention of VLAN tags in supported packet types.
- don't clear PKT_TX_TCP_SEG flag if GSO fails.
- take all packet overhead into account when checking for empty packet.
- ensure that only enabled GSO types are enacted upon (i.e. no fall-through to
  TCP/IPv4 case from tunneled case).
- validate user-supplied gso segsz arg against RTE_GSO_SEG_SIZE_MIN in testpmd.
- simplify error-checking/handling for GSO failure case in testpmd csum engine.
- use 0 instead of !RTE_GSO_IPID_FIXED in testpmd.

v6:
- rebase to HEAD of master (i5dce9fcA)
- remove 'l3_offset' parameter from 'update_ipv4_tcp_headers'

v5:
- add GSO section to the programmer's guide.
- use MF or (previously 'and') offset to check if a packet is IP
  fragmented.
- move 'update_header' helper functions to gso_common.h.
- move txp/ip

[dpdk-dev] [PATCH v7 4/6] gso: add GRE GSO support

2017-10-05 Thread Mark Kavanagh
This patch adds GSO support for GRE-tunneled packets. Supported GRE
packets must contain an outer IPv4 header, and inner TCP/IPv4 headers.
They may also contain a single VLAN tag. GRE GSO doesn't check if all
input packets have correct checksums and doesn't update checksums for
output packets. Additionally, it doesn't process IP fragmented packets.

As with VxLAN GSO, GRE GSO uses a two-segment MBUF to organize each
output packet, which requires multi-segment mbuf support in the TX
functions of the NIC driver. Also, if a packet is GSOed, GRE GSO reduces
its MBUF refcnt by 1. As a result, when all of its GSOed segments are
freed, the packet is freed automatically.

Signed-off-by: Mark Kavanagh 
Signed-off-by: Jiayu Hu 
---
 doc/guides/rel_notes/release_17_11.rst |  2 ++
 lib/librte_gso/gso_common.h|  5 +
 lib/librte_gso/gso_tunnel_tcp4.c   | 14 ++
 lib/librte_gso/rte_gso.c   |  9 ++---
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index c58eeb1..2faa630 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -50,6 +50,8 @@ New Features
   * TCP/IPv4 packets.
   * VxLAN packets, which must have an outer IPv4 header, and contain
 an inner TCP/IPv4 packet.
+  * GRE packets, which must contain an outer IPv4 header, and inner
+TCP/IPv4 headers.
 
   The GSO library doesn't check if the input packets have correct
   checksums, and doesn't update checksums for output packets.
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 95d54e7..145ea49 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -55,6 +55,11 @@
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
 PKT_TX_TUNNEL_VXLAN))
 
+#define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
+   PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \
+   (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+PKT_TX_TUNNEL_GRE))
+
 /**
  * Internal function which updates the UDP header of a packet, following
  * segmentation. This is required to update the header's datagram length field.
diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c
index 5e8c8e5..8d0cfd7 100644
--- a/lib/librte_gso/gso_tunnel_tcp4.c
+++ b/lib/librte_gso/gso_tunnel_tcp4.c
@@ -42,11 +42,13 @@
struct tcp_hdr *tcp_hdr;
uint32_t sent_seq;
uint16_t outer_id, inner_id, tail_idx, i;
-   uint16_t outer_ipv4_offset, inner_ipv4_offset, udp_offset, tcp_offset;
+   uint16_t outer_ipv4_offset, inner_ipv4_offset;
+   uint16_t udp_gre_offset, tcp_offset;
+   uint8_t update_udp_hdr;
 
outer_ipv4_offset = pkt->outer_l2_len;
-   udp_offset = outer_ipv4_offset + pkt->outer_l3_len;
-   inner_ipv4_offset = udp_offset + pkt->l2_len;
+   udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len;
+   inner_ipv4_offset = udp_gre_offset + pkt->l2_len;
tcp_offset = inner_ipv4_offset + pkt->l3_len;
 
/* Outer IPv4 header. */
@@ -63,9 +65,13 @@
sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
tail_idx = nb_segs - 1;
 
+   /* Only update UDP header for VxLAN packets. */
+   update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0;
+
for (i = 0; i < nb_segs; i++) {
update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
-   update_udp_header(segs[i], udp_offset);
+   if (update_udp_hdr)
+   update_udp_header(segs[i], udp_gre_offset);
update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
update_tcp_header(segs[i], tcp_offset, sent_seq, i < tail_idx);
outer_id++;
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index a6f38e2..1d4082a 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -61,7 +61,8 @@
if ((gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN) ||
(gso_ctx->gso_size >= pkt->pkt_len) ||
(gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO |
-  DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) !=
+  DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+  DEV_TX_OFFLOAD_GRE_TNL_TSO)) !=
gso_ctx->gso_types) {
pkt->ol_flags &= (~PKT_TX_TCP_SEG);
pkts_out[0] = pkt;
@@ -74,8 +75,10 @@
ipid_delta = (gso_ctx->flag != RTE_GSO_FLAG_IPID_FIXED);
ol_flags = pkt->ol_flags;
 
-   if (IS_IPV4_VXLAN_TCP4(pkt->ol_flags)
-   && (gso_ctx->gso_types & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) {
+   if ((IS_IPV4_VXLAN_TCP4(pkt->ol_flags) &&
+   (gso_ctx->gso_types & DEV_TX_OFFLO

[dpdk-dev] [PATCH v7 2/6] gso: add TCP/IPv4 GSO support

2017-10-05 Thread Mark Kavanagh
From: Jiayu Hu 

This patch adds GSO support for TCP/IPv4 packets. Supported packets
may include a single VLAN tag. TCP/IPv4 GSO doesn't check if input
packets have correct checksums, and doesn't update checksums for
output packets (the responsibility for this lies with the application).
Additionally, TCP/IPv4 GSO doesn't process IP fragmented packets.

TCP/IPv4 GSO uses two chained MBUFs, one direct MBUF and one indrect
MBUF, to organize an output packet. Note that we refer to these two
chained MBUFs as a two-segment MBUF. The direct MBUF stores the packet
header, while the indirect mbuf simply points to a location within the
original packet's payload. Consequently, use of the GSO library requires
multi-segment MBUF support in the TX functions of the NIC driver.

If a packet is GSO'd, TCP/IPv4 GSO reduces its MBUF refcnt by 1. As a
result, when all of its GSOed segments are freed, the packet is freed
automatically.

Signed-off-by: Jiayu Hu 
Signed-off-by: Mark Kavanagh 
Tested-by: Lei Yao 
---
 doc/guides/rel_notes/release_17_11.rst  |  12 +++
 lib/Makefile|   2 +-
 lib/librte_eal/common/include/rte_log.h |   1 +
 lib/librte_gso/Makefile |   2 +
 lib/librte_gso/gso_common.c | 153 
 lib/librte_gso/gso_common.h | 141 +
 lib/librte_gso/gso_tcp4.c   | 104 ++
 lib/librte_gso/gso_tcp4.h   |  74 +++
 lib/librte_gso/rte_gso.c|  52 ++-
 lib/librte_gso/rte_gso.h|   7 +-
 10 files changed, 543 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_gso/gso_common.c
 create mode 100644 lib/librte_gso/gso_common.h
 create mode 100644 lib/librte_gso/gso_tcp4.c
 create mode 100644 lib/librte_gso/gso_tcp4.h

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index 5bb36b7..dd37169 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -41,6 +41,18 @@ New Features
  Also, make sure to start the actual text at the margin.
  =
 
+* **Added the Generic Segmentation Offload Library.**
+
+  Added the Generic Segmentation Offload (GSO) library to enable
+  applications to split large packets (e.g. MTU is 64KB) into small
+  ones (e.g. MTU is 1500B). Supported packet types are:
+
+  * TCP/IPv4 packets.
+
+  The GSO library doesn't check if the input packets have correct
+  checksums, and doesn't update checksums for output packets.
+  Additionally, the GSO library doesn't process IP fragmented packets.
+
 
 Resolved Issues
 ---
diff --git a/lib/Makefile b/lib/Makefile
index 3d123f4..5ecd1b3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -109,7 +109,7 @@ DEPDIRS-librte_reorder := librte_eal librte_mempool 
librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
 DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
-DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net 
librte_mempool
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_eal/common/include/rte_log.h 
b/lib/librte_eal/common/include/rte_log.h
index ec8dba7..2fa1199 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -87,6 +87,7 @@ struct rte_logs {
 #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
 #define RTE_LOGTYPE_EFD   18 /**< Log related to EFD. */
 #define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO   20 /**< Log related to GSO. */
 
 /* these log types can be used in an application */
 #define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index aeaacbc..2be64d1 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -42,6 +42,8 @@ LIBABIVER := 1
 
 #source files
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
new file mode 100644
index 000..ee75d4c
--- /dev/null
+++ b/lib/librte_gso/gso_common.c
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the 

[dpdk-dev] [PATCH v7 1/6] gso: add Generic Segmentation Offload API framework

2017-10-05 Thread Mark Kavanagh
From: Jiayu Hu 

Generic Segmentation Offload (GSO) is a SW technique to split large
packets into small ones. Akin to TSO, GSO enables applications to
operate on large packets, thus reducing per-packet processing overhead.

To enable more flexibility to applications, DPDK GSO is implemented
as a standalone library. Applications explicitly use the GSO library
to segment packets. To segment a packet requires two steps. The first
is to set proper flags to mbuf->ol_flags, where the flags are the same
as that of TSO. The second is to call the segmentation API,
rte_gso_segment(). This patch introduces the GSO API framework to DPDK.

rte_gso_segment() splits an input packet into small ones in each
invocation. The GSO library refers to these small packets generated
by rte_gso_segment() as GSO segments. Each of the newly-created GSO
segments is organized as a two-segment MBUF, where the first segment is a
standard MBUF, which stores a copy of packet header, and the second is an
indirect MBUF which points to a section of data in the input packet.
rte_gso_segment() reduces the refcnt of the input packet by 1. Therefore,
when all GSO segments are freed, the input packet is freed automatically.
Additionally, since each GSO segment has multiple MBUFs (i.e. 2 MBUFs),
the driver of the interface which the GSO segments are sent to should
support to transmit multi-segment packets.

The GSO framework clears the PKT_TX_TCP_SEG flag for both the input
packet, and all produced GSO segments in the event of success, since
segmentation in hardware is no longer required at that point.

Signed-off-by: Jiayu Hu 
Signed-off-by: Mark Kavanagh 
---
 config/common_base |   5 ++
 doc/api/doxy-api-index.md  |   1 +
 doc/api/doxy-api.conf  |   1 +
 doc/guides/rel_notes/release_17_11.rst |   1 +
 lib/Makefile   |   2 +
 lib/librte_gso/Makefile|  49 +++
 lib/librte_gso/rte_gso.c   |  52 
 lib/librte_gso/rte_gso.h   | 143 +
 lib/librte_gso/rte_gso_version.map |   7 ++
 mk/rte.app.mk  |   1 +
 10 files changed, 262 insertions(+)
 create mode 100644 lib/librte_gso/Makefile
 create mode 100644 lib/librte_gso/rte_gso.c
 create mode 100644 lib/librte_gso/rte_gso.h
 create mode 100644 lib/librte_gso/rte_gso_version.map

diff --git a/config/common_base b/config/common_base
index 12f6be9..58ca5c0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -653,6 +653,11 @@ CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n
 CONFIG_RTE_LIBRTE_GRO=y
 
 #
+# Compile GSO library
+#
+CONFIG_RTE_LIBRTE_GSO=y
+
+#
 # Compile librte_meter
 #
 CONFIG_RTE_LIBRTE_METER=y
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 19e0d4f..6512918 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -101,6 +101,7 @@ The public API headers are grouped by topics:
   [TCP](@ref rte_tcp.h),
   [UDP](@ref rte_udp.h),
   [GRO](@ref rte_gro.h),
+  [GSO](@ref rte_gso.h),
   [frag/reass] (@ref rte_ip_frag.h),
   [LPM IPv4 route] (@ref rte_lpm.h),
   [LPM IPv6 route] (@ref rte_lpm6.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index 823554f..408f2e6 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -47,6 +47,7 @@ INPUT   = doc/api/doxy-api-index.md \
   lib/librte_ether \
   lib/librte_eventdev \
   lib/librte_gro \
+  lib/librte_gso \
   lib/librte_hash \
   lib/librte_ip_frag \
   lib/librte_jobstats \
diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index f6f9169..5bb36b7 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -174,6 +174,7 @@ The libraries prepended with a plus sign were incremented 
in this version.
  librte_ethdev.so.7
  librte_eventdev.so.2
  librte_gro.so.1
+   + librte_gso.so.1
  librte_hash.so.2
  librte_ip_frag.so.1
  librte_jobstats.so.1
diff --git a/lib/Makefile b/lib/Makefile
index 86caba1..3d123f4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -108,6 +108,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
 DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
new file mode 100644
index 000..aeaacbc
--- /dev/null
+++ b/lib/libr

[dpdk-dev] [PATCH v7 3/6] gso: add VxLAN GSO support

2017-10-05 Thread Mark Kavanagh
This patch adds a framework that allows GSO on tunneled packets.
Furthermore, it leverages that framework to provide GSO support for
VxLAN-encapsulated packets.

Supported VxLAN packets must have an outer IPv4 header (prepended by an
optional VLAN tag), and contain an inner TCP/IPv4 packet (with an optional
inner VLAN tag).

VxLAN GSO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it doesn't
process IP fragmented packets.

As with TCP/IPv4 GSO, VxLAN GSO uses a two-segment MBUF to organize each
output packet, which mandates support for multi-segment mbufs in the TX
functions of the NIC driver. Also, if a packet is GSOed, VxLAN GSO
reduces its MBUF refcnt by 1. As a result, when all of its GSO'd segments
are freed, the packet is freed automatically.

Signed-off-by: Mark Kavanagh 
Signed-off-by: Jiayu Hu 
---
 doc/guides/rel_notes/release_17_11.rst |   2 +
 lib/librte_gso/Makefile|   1 +
 lib/librte_gso/gso_common.h|  25 +++
 lib/librte_gso/gso_tunnel_tcp4.c   | 120 +
 lib/librte_gso/gso_tunnel_tcp4.h   |  75 +
 lib/librte_gso/rte_gso.c   |  14 +++-
 6 files changed, 235 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_gso/gso_tunnel_tcp4.c
 create mode 100644 lib/librte_gso/gso_tunnel_tcp4.h

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index dd37169..c58eeb1 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -48,6 +48,8 @@ New Features
   ones (e.g. MTU is 1500B). Supported packet types are:
 
   * TCP/IPv4 packets.
+  * VxLAN packets, which must have an outer IPv4 header, and contain
+an inner TCP/IPv4 packet.
 
   The GSO library doesn't check if the input packets have correct
   checksums, and doesn't update checksums for output packets.
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 2be64d1..e6d41df 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -44,6 +44,7 @@ LIBABIVER := 1
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index a8ad638..95d54e7 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \
|| ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG)
@@ -49,6 +50,30 @@
 #define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \
(PKT_TX_TCP_SEG | PKT_TX_IPV4))
 
+#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
+   PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \
+   (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+PKT_TX_TUNNEL_VXLAN))
+
+/**
+ * Internal function which updates the UDP header of a packet, following
+ * segmentation. This is required to update the header's datagram length field.
+ *
+ * @param pkt
+ *  The packet containing the UDP header.
+ * @param udp_offset
+ *  The offset of the UDP header from the start of the packet.
+ */
+static inline void
+update_udp_header(struct rte_mbuf *pkt, uint16_t udp_offset)
+{
+   struct udp_hdr *udp_hdr;
+
+   udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+   udp_offset);
+   udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - udp_offset);
+}
+
 /**
  * Internal function which updates the TCP header of a packet, following
  * segmentation. This is required to update the header's 'sent' sequence
diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c
new file mode 100644
index 000..5e8c8e5
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_tcp4.c
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *

[dpdk-dev] [PATCH v7 5/6] app/testpmd: enable TCP/IPv4, VxLAN and GRE GSO

2017-10-05 Thread Mark Kavanagh
From: Jiayu Hu 

This patch adds GSO support to the csum forwarding engine. Oversized
packets transmitted over a GSO-enabled port will undergo segmentation
(with the exception of packet-types unsupported by the GSO library).
GSO support is disabled by default.

GSO support may be toggled on a per-port basis, using the command:

"set port  gso on|off"

The maximum packet length (including the packet header and payload) for
GSO segments may be set with the command:

"set gso segsz "

Show GSO configuration for a given port with the command:

"show port  gso"

Signed-off-by: Jiayu Hu 
Signed-off-by: Mark Kavanagh 
---
 app/test-pmd/cmdline.c  | 179 
 app/test-pmd/config.c   |  24 
 app/test-pmd/csumonly.c |  43 ++-
 app/test-pmd/testpmd.c  |  13 ++
 app/test-pmd/testpmd.h  |  10 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  46 +++
 6 files changed, 311 insertions(+), 4 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index ccdf239..92e6171 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -431,6 +431,17 @@ static void cmd_help_long_parsed(void *parsed_result,
"Set max flow number and max packet number per-flow"
" for GRO.\n\n"
 
+   "set port (port_id) gso (on|off)"
+   "Enable or disable Generic Segmentation Offload in"
+   " csum forwarding engine.\n\n"
+
+   "set gso segsz (length)\n"
+   "Set max packet length for output GSO segments,"
+   " including packet header and payload.\n\n"
+
+   "show port (port_id) gso\n"
+   "Show GSO configuration.\n\n"
+
"set fwd (%s)\n"
"Set packet forwarding mode.\n\n"
 
@@ -3967,6 +3978,171 @@ struct cmd_gro_set_result {
},
 };
 
+/* *** ENABLE/DISABLE GSO *** */
+struct cmd_gso_enable_result {
+   cmdline_fixed_string_t cmd_set;
+   cmdline_fixed_string_t cmd_port;
+   cmdline_fixed_string_t cmd_keyword;
+   cmdline_fixed_string_t cmd_mode;
+   uint8_t cmd_pid;
+};
+
+static void
+cmd_gso_enable_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_gso_enable_result *res;
+
+   res = parsed_result;
+   if (!strcmp(res->cmd_keyword, "gso"))
+   setup_gso(res->cmd_mode, res->cmd_pid);
+}
+
+cmdline_parse_token_string_t cmd_gso_enable_set =
+   TOKEN_STRING_INITIALIZER(struct cmd_gso_enable_result,
+   cmd_set, "set");
+cmdline_parse_token_string_t cmd_gso_enable_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_gso_enable_result,
+   cmd_port, "port");
+cmdline_parse_token_string_t cmd_gso_enable_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_gso_enable_result,
+   cmd_keyword, "gso");
+cmdline_parse_token_string_t cmd_gso_enable_mode =
+   TOKEN_STRING_INITIALIZER(struct cmd_gso_enable_result,
+   cmd_mode, "on#off");
+cmdline_parse_token_num_t cmd_gso_enable_pid =
+   TOKEN_NUM_INITIALIZER(struct cmd_gso_enable_result,
+   cmd_pid, UINT8);
+
+cmdline_parse_inst_t cmd_gso_enable = {
+   .f = cmd_gso_enable_parsed,
+   .data = NULL,
+   .help_str = "set port  gso on|off",
+   .tokens = {
+   (void *)&cmd_gso_enable_set,
+   (void *)&cmd_gso_enable_port,
+   (void *)&cmd_gso_enable_pid,
+   (void *)&cmd_gso_enable_keyword,
+   (void *)&cmd_gso_enable_mode,
+   NULL,
+   },
+};
+
+/* *** SET MAX PACKET LENGTH FOR GSO SEGMENTS *** */
+struct cmd_gso_size_result {
+   cmdline_fixed_string_t cmd_set;
+   cmdline_fixed_string_t cmd_keyword;
+   cmdline_fixed_string_t cmd_segsz;
+   uint16_t cmd_size;
+};
+
+static void
+cmd_gso_size_parsed(void *parsed_result,
+  __attribute__((unused)) struct cmdline *cl,
+  __attribute__((unused)) void *data)
+{
+   struct cmd_gso_size_result *res = parsed_result;
+
+   if (test_done == 0) {
+   printf("Before setting GSO segsz, please first stop 
fowarding\n");
+   return;
+   }
+
+   if (!strcmp(res->cmd_keyword, "gso") &&
+   !strcmp(res->cmd_segsz, "segsz")) {
+   if (res->cmd_size < RTE_GSO_SEG_SIZE_MIN)
+   printf("gso_size should be larger than %lu."
+   " Please input a legal value\n",
+   RTE_GSO_SEG_SIZE_MIN);
+   else
+   gso_max_segment_size = 

[dpdk-dev] [PATCH v7 6/6] doc: add GSO programmer's guide

2017-10-05 Thread Mark Kavanagh
Add programmer's guide doc to explain the design and use of the
GSO library.

Signed-off-by: Mark Kavanagh 
Signed-off-by: Jiayu Hu 
---
 MAINTAINERS|   6 +
 .../generic_segmentation_offload_lib.rst   | 256 +++
 .../prog_guide/img/gso-output-segment-format.svg   | 313 ++
 doc/guides/prog_guide/img/gso-three-seg-mbuf.svg   | 477 +
 doc/guides/prog_guide/index.rst|   1 +
 5 files changed, 1053 insertions(+)
 create mode 100644 doc/guides/prog_guide/generic_segmentation_offload_lib.rst
 create mode 100644 doc/guides/prog_guide/img/gso-output-segment-format.svg
 create mode 100644 doc/guides/prog_guide/img/gso-three-seg-mbuf.svg

diff --git a/MAINTAINERS b/MAINTAINERS
index 8df2a7f..8f0a4bd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -644,6 +644,12 @@ M: Jiayu Hu 
 F: lib/librte_gro/
 F: doc/guides/prog_guide/generic_receive_offload_lib.rst
 
+Generic Segmentation Offload
+M: Jiayu Hu 
+M: Mark Kavanagh 
+F: lib/librte_gso/
+F: doc/guides/prog_guide/generic_segmentation_offload_lib.rst
+
 Distributor
 M: Bruce Richardson 
 M: David Hunt 
diff --git a/doc/guides/prog_guide/generic_segmentation_offload_lib.rst 
b/doc/guides/prog_guide/generic_segmentation_offload_lib.rst
new file mode 100644
index 000..5e78f16
--- /dev/null
+++ b/doc/guides/prog_guide/generic_segmentation_offload_lib.rst
@@ -0,0 +1,256 @@
+..  BSD LICENSE
+Copyright(c) 2017 Intel Corporation. All rights reserved.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+* Neither the name of Intel Corporation nor the names of its
+contributors may be used to endorse or promote products derived
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Generic Segmentation Offload Library
+
+
+Overview
+
+Generic Segmentation Offload (GSO) is a widely used software implementation of
+TCP Segmentation Offload (TSO), which reduces per-packet processing overhead.
+Much like TSO, GSO gains performance by enabling upper layer applications to
+process a smaller number of large packets (e.g. MTU size of 64KB), instead of
+processing higher numbers of small packets (e.g. MTU size of 1500B), thus
+reducing per-packet overhead.
+
+For example, GSO allows guest kernel stacks to transmit over-sized TCP segments
+that far exceed the kernel interface's MTU; this eliminates the need to segment
+packets within the guest, and improves the data-to-overhead ratio of both the
+guest-host link, and PCI bus. The expectation of the guest network stack in 
this
+scenario is that segmentation of egress frames will take place either in the 
NIC
+HW, or where that hardware capability is unavailable, either in the host
+application, or network stack.
+
+Bearing that in mind, the GSO library enables DPDK applications to segment
+packets in software. Note however, that GSO is implemented as a standalone
+library, and not via a 'fallback' mechanism (i.e. for when TSO is unsupported
+in the underlying hardware); that is, applications must explicitly invoke the
+GSO library to segment packets. The size of GSO segments ``(segsz)`` is
+configurable by the application.
+
+Limitations
+---
+
+#. The GSO library doesn't check if input packets have correct checksums.
+
+#. In addition, the GSO library doesn't re-calculate checksums for segmented
+   packets (that task is left to the application).
+
+#. IP fragments are unsupported by the GSO library.
+
+#. The egress interface's driver must support multi-segment packets.
+
+#. Currently, the GSO library supports the following IPv4 packet types:
+
+ - TCP
+ - VxLAN
+ - G

[dpdk-dev] [PATCH v4 1/2] doc: add generic compilation doc for all sample apps

2017-10-05 Thread Marko Kovacevic
From: Herakliusz Lipiec 

Moved duplicated, and occasionally outdated, doc sections from each
of the sample app guides chapters to a common chapter at the start.

This reduces the duplication in the docs and provides a single
point of reference for compiling the sample apps.

Fixes: d0dff9ba445e ("doc: sample application user guide")
Fixes: 60643134c1c1 ("doc: add distributor application")
Fixes: bda68ab9d1e7 ("examples/ethtool: add user-space ethtool sample 
application")
Fixes: d299106e8e31 ("examples/ipsec-secgw: add IPsec sample application")
Fixes: e64833f2273a ("examples/l2fwd-keepalive: add sample application")
Fixes: d0dff9ba445e ("doc: sample application user guide")
Fixes: f6baccbc2b3b ("examples/l2fwd-cat: add sample application for PQoS CAT 
and CDP")
Fixes: ba7b86b1419b ("doc: add l2fwd-crypto sample app guide")
Fixes: ccefe752cab0 ("doc: add jobstats sample guide")
Fixes: 1b2038b06fae ("doc: new packet ordering app description")
Fixes: 4d1a771bd88d ("doc: add guide for performance-thread example")
Fixes: 2d1232571112 ("doc: add PTP client sample guide")
Fixes: 0d8d3df6b81b ("doc: add Rx and Tx callbacks sample app user guide")
Fixes: eb21185d6f21 ("doc: add flow distributor example guide")
Fixes: 1443da3bbd71 ("doc: add basic forwarding skeleton user guide")
Fixes: 181654b7162e ("doc: add a VXLAN sample guide")
Fixes: c75f4e6a7a2b ("doc: add vm power mgmt app")

Signed-off-by: Herakliusz Lipiec 
Signed-off-by: Marko Kovacevic 

---
v3:
   -removed unnecessary addition of export RTE_TARGET (Thomas)
   -changed make install to make config   (Thomas)
   -Inserted new method to export and make examples   (Thomas)
---
 doc/guides/sample_app_ug/cmd_line.rst  |  21 +---
 doc/guides/sample_app_ug/compiling.rst | 135 +
 doc/guides/sample_app_ug/dist_app.rst  |  22 +---
 doc/guides/sample_app_ug/ethtool.rst   |  23 +---
 doc/guides/sample_app_ug/exception_path.rst|  23 +---
 doc/guides/sample_app_ug/hello_world.rst   |  21 +---
 doc/guides/sample_app_ug/index.rst |   1 +
 doc/guides/sample_app_ug/ip_frag.rst   |  27 +
 doc/guides/sample_app_ug/ip_reassembly.rst |  22 +---
 doc/guides/sample_app_ug/ipsec_secgw.rst   |  19 +--
 doc/guides/sample_app_ug/ipv4_multicast.rst|  33 +
 doc/guides/sample_app_ug/keep_alive.rst|  22 +---
 doc/guides/sample_app_ug/kernel_nic_interface.rst  |  24 +---
 doc/guides/sample_app_ug/l2_forward_cat.rst|  34 ++
 doc/guides/sample_app_ug/l2_forward_crypto.rst |  21 +---
 doc/guides/sample_app_ug/l2_forward_job_stats.rst  |  21 +---
 .../sample_app_ug/l2_forward_real_virtual.rst  |  21 +---
 doc/guides/sample_app_ug/l3_forward.rst|  23 +---
 .../sample_app_ug/l3_forward_access_ctrl.rst   |  23 +---
 doc/guides/sample_app_ug/l3_forward_power_man.rst  |  23 +---
 doc/guides/sample_app_ug/l3_forward_virtual.rst|  29 +
 doc/guides/sample_app_ug/link_status_intr.rst  |  27 +
 doc/guides/sample_app_ug/load_balancer.rst |  19 +--
 doc/guides/sample_app_ug/multi_process.rst |  22 +---
 doc/guides/sample_app_ug/netmap_compatibility.rst  |  25 +---
 doc/guides/sample_app_ug/packet_ordering.rst   |  23 +---
 doc/guides/sample_app_ug/performance_thread.rst|  22 +---
 doc/guides/sample_app_ug/ptpclient.rst |  33 ++---
 doc/guides/sample_app_ug/qos_metering.rst  |  24 +---
 doc/guides/sample_app_ug/qos_scheduler.rst |  21 +---
 doc/guides/sample_app_ug/quota_watermark.rst   |  22 +---
 doc/guides/sample_app_ug/rxtx_callbacks.rst|  25 +---
 doc/guides/sample_app_ug/server_node_efd.rst   |  21 +---
 doc/guides/sample_app_ug/skeleton.rst  |  25 +---
 doc/guides/sample_app_ug/test_pipeline.rst |  20 +--
 doc/guides/sample_app_ug/timer.rst |  21 +---
 doc/guides/sample_app_ug/vhost.rst |  16 +--
 doc/guides/sample_app_ug/vm_power_management.rst   |   9 +-
 doc/guides/sample_app_ug/vmdq_dcb_forwarding.rst   |  19 +--
 39 files changed, 241 insertions(+), 741 deletions(-)
 create mode 100644 doc/guides/sample_app_ug/compiling.rst

diff --git a/doc/guides/sample_app_ug/cmd_line.rst 
b/doc/guides/sample_app_ug/cmd_line.rst
index 36c7971..7ea0dea 100644
--- a/doc/guides/sample_app_ug/cmd_line.rst
+++ b/doc/guides/sample_app_ug/cmd_line.rst
@@ -68,26 +68,9 @@ There are three simple commands:
 Compiling the Application
 -
 
-#.  Go to example directory:
+To compile the sample application see :ref:`sample_app_compilation`
 
-.. code-block:: console
-
-export RTE_SDK=/path/to/rte_sdk
-cd ${RTE_SDK}/examples/cmdline
-
-#.  Set the target (a default target is used if not specified). For example:
-
-.. code-block:: console
-
-export RTE_TARGET=x86_64-native-linuxapp-gcc
-
-Refer to the *DPDK Getting Started Guide*

[dpdk-dev] [PATCH v4 2/2] doc: add new introduction to sample app guides

2017-10-05 Thread Marko Kovacevic
Add new Introduction Section into the sample app guides.

Signed-off-by: Marko Kovacevic 
---
 doc/guides/faq/faq.rst |   2 +-
 doc/guides/sample_app_ug/dist_app.rst  |   2 +
 doc/guides/sample_app_ug/exception_path.rst|   2 +-
 doc/guides/sample_app_ug/hello_world.rst   |   2 +
 doc/guides/sample_app_ug/index.rst |   2 +
 doc/guides/sample_app_ug/intro.rst | 152 +++--
 doc/guides/sample_app_ug/ipsec_secgw.rst   |   2 +
 .../sample_app_ug/l2_forward_real_virtual.rst  |   2 +-
 doc/guides/sample_app_ug/l3_forward.rst|   2 +
 doc/guides/sample_app_ug/multi_process.rst |   2 +-
 doc/guides/sample_app_ug/ptpclient.rst |   1 +
 doc/guides/sample_app_ug/qos_scheduler.rst |   2 +
 doc/guides/sample_app_ug/rxtx_callbacks.rst|   1 +
 doc/guides/sample_app_ug/server_node_efd.rst   |   2 +-
 doc/guides/sample_app_ug/skeleton.rst  |   1 +
 15 files changed, 132 insertions(+), 45 deletions(-)

diff --git a/doc/guides/faq/faq.rst b/doc/guides/faq/faq.rst
index dac8050..da9b484 100644
--- a/doc/guides/faq/faq.rst
+++ b/doc/guides/faq/faq.rst
@@ -221,7 +221,7 @@ I350 has RSS support and 8 queue pairs can be used in RSS 
mode. It should work w
 How can hugepage-backed memory be shared among multiple processes?
 --
 
-See the Primary and Secondary examples in the :ref:`multi-process sample 
application `.
+See the Primary and Secondary examples in the :ref:`multi-process sample 
application `.
 
 
 Why can't my application receive packets on my system with UEFI Secure Boot 
enabled?
diff --git a/doc/guides/sample_app_ug/dist_app.rst 
b/doc/guides/sample_app_ug/dist_app.rst
index 466115d..0431b97 100644
--- a/doc/guides/sample_app_ug/dist_app.rst
+++ b/doc/guides/sample_app_ug/dist_app.rst
@@ -28,6 +28,8 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+.. _sample_app_dist_app:
+
 Distributor Sample Application
 ==
 
diff --git a/doc/guides/sample_app_ug/exception_path.rst 
b/doc/guides/sample_app_ug/exception_path.rst
index 2dee8bf..40e5b5c 100644
--- a/doc/guides/sample_app_ug/exception_path.rst
+++ b/doc/guides/sample_app_ug/exception_path.rst
@@ -115,7 +115,7 @@ The following sections provide some explanation of the code.
 Initialization
 ~~
 
-Setup of the mbuf pool, driver and queues is similar to the setup done in the 
:ref:`l2_fwd_app_real_and_virtual`.
+Setup of the mbuf pool, driver and queues is similar to the setup done in the 
:ref:`sample_app_l2_fwd`.
 In addition, the TAP interfaces must also be created.
 A TAP interface is created for each lcore that is being used.
 The code for creating the TAP interface is as follows:
diff --git a/doc/guides/sample_app_ug/hello_world.rst 
b/doc/guides/sample_app_ug/hello_world.rst
index 8196702..8cf23a3 100644
--- a/doc/guides/sample_app_ug/hello_world.rst
+++ b/doc/guides/sample_app_ug/hello_world.rst
@@ -28,6 +28,8 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+.. _sample_app_hello_world:
+
 Hello World Sample Application
 ==
 
diff --git a/doc/guides/sample_app_ug/index.rst 
b/doc/guides/sample_app_ug/index.rst
index 4f8340a..163b468 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -1,4 +1,5 @@
 ..  BSD LICENSE
+
 Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
 All rights reserved.
 
@@ -28,6 +29,7 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+
 Sample Applications User Guides
 ===
 
diff --git a/doc/guides/sample_app_ug/intro.rst 
b/doc/guides/sample_app_ug/intro.rst
index d3f261b..b276714 100644
--- a/doc/guides/sample_app_ug/intro.rst
+++ b/doc/guides/sample_app_ug/intro.rst
@@ -1,5 +1,5 @@
 ..  BSD LICENSE
-Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -28,42 +28,114 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-Introduction
-
-
-This document describes the sample applications that are included in the Data 
Plane Development Kit (DPDK).
-Each chapter describes a sample application that showcases specific 
functionality and
-provides instructions on how to compile, run and use the sample application.
-
-Documentation Roadmap
-

Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy

2017-10-05 Thread Li, Xiaoyun


> -Original Message-
> From: Ananyev, Konstantin
> Sent: Thursday, October 5, 2017 17:37
> To: Li, Xiaoyun ; Richardson, Bruce
> 
> Cc: Lu, Wenzhuo ; Zhang, Helin
> ; dev@dpdk.org; Thomas Monjalon
> (thomas.monja...@6wind.com) 
> Subject: RE: [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy
> 
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 8c08b8d..15a2fe9 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -241,5 +241,6 @@ EXPERIMENTAL {
> > rte_service_runstate_set;
> > rte_service_set_stats_enable;
> > rte_service_start_with_defaults;
> > +   rte_memcpy_ptr;
> >
> >  } DPDK_17.08;
> 
> I am not an expert in DPDK versioning system, But shouldn't we create a
> 17.11 section here?
Should we create a 17.11 section? I am not sure who to ask for.

> Also I think an alphabetical order should be preserved here.
OK.

> Konstantin


Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy

2017-10-05 Thread Richardson, Bruce


> -Original Message-
> From: Li, Xiaoyun
> Sent: Thursday, October 5, 2017 12:19 PM
> To: Ananyev, Konstantin ; Richardson, Bruce
> 
> Cc: Lu, Wenzhuo ; Zhang, Helin
> ; dev@dpdk.org; Thomas Monjalon
> (thomas.monja...@6wind.com) 
> Subject: RE: [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy
> 
> 
> 
> > -Original Message-
> > From: Ananyev, Konstantin
> > Sent: Thursday, October 5, 2017 17:37
> > To: Li, Xiaoyun ; Richardson, Bruce
> > 
> > Cc: Lu, Wenzhuo ; Zhang, Helin
> > ; dev@dpdk.org; Thomas Monjalon
> > (thomas.monja...@6wind.com) 
> > Subject: RE: [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy
> >
> > > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > index 8c08b8d..15a2fe9 100644
> > > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > @@ -241,5 +241,6 @@ EXPERIMENTAL {
> > >   rte_service_runstate_set;
> > >   rte_service_set_stats_enable;
> > >   rte_service_start_with_defaults;
> > > + rte_memcpy_ptr;
> > >
> > >  } DPDK_17.08;
> >
> > I am not an expert in DPDK versioning system, But shouldn't we create
> > a
> > 17.11 section here?
> Should we create a 17.11 section? I am not sure who to ask for.
> 
Any new functions that are public and are added in the 17.11 release need to
be added to the map file in a new 17.11 section. They are not part of the
ABI for the 17.08 release as they were not present there.



Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy

2017-10-05 Thread Li, Xiaoyun
Another thing, if add 17.11, the end is 17.08 or EXPERIMENTAL?

Best Regards,
Xiaoyun Li



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Li, Xiaoyun
> Sent: Thursday, October 5, 2017 19:19
> To: Ananyev, Konstantin ; Richardson,
> Bruce 
> Cc: Lu, Wenzhuo ; Zhang, Helin
> ; dev@dpdk.org; Thomas Monjalon
> (thomas.monja...@6wind.com) 
> Subject: Re: [dpdk-dev] [PATCH v6 1/3] eal/x86: run-time dispatch over
> memcpy
> 
> 
> 
> > -Original Message-
> > From: Ananyev, Konstantin
> > Sent: Thursday, October 5, 2017 17:37
> > To: Li, Xiaoyun ; Richardson, Bruce
> > 
> > Cc: Lu, Wenzhuo ; Zhang, Helin
> > ; dev@dpdk.org; Thomas Monjalon
> > (thomas.monja...@6wind.com) 
> > Subject: RE: [PATCH v6 1/3] eal/x86: run-time dispatch over memcpy
> >
> > > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > index 8c08b8d..15a2fe9 100644
> > > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > @@ -241,5 +241,6 @@ EXPERIMENTAL {
> > >   rte_service_runstate_set;
> > >   rte_service_set_stats_enable;
> > >   rte_service_start_with_defaults;
> > > + rte_memcpy_ptr;
> > >
> > >  } DPDK_17.08;
> >
> > I am not an expert in DPDK versioning system, But shouldn't we create
> > a
> > 17.11 section here?
> Should we create a 17.11 section? I am not sure who to ask for.
> 
> > Also I think an alphabetical order should be preserved here.
> OK.
> 
> > Konstantin


Re: [dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs

2017-10-05 Thread Adrien Mazarguil
On Thu, Oct 05, 2017 at 09:33:05AM +, Ophir Munk wrote:
> v4 (Ophir):
> - Split "net/mlx4: restore Rx scatter support" commit from "net/mlx4: 
>   restore full Rx support bypassing Verbs" commit
> 
> v3 (Adrien):
> - Drop a few unrelated or unnecessary changes such as the removal of
>   MLX4_PMD_TX_MP_CACHE.
> - Move device checksum support detection code to its previous location.
> - Fix include guard in mlx4_prm.h.
> - Reorder #includes alphabetically.
> - Replace MLX4_TRANSPOSE() macro with documented inline function.
> - Remove extra spaces and blank lines.
> - Use uint8_t * instead of char * for buffers.
> - Replace mlx4_get_cqe() macro with a documented inline function.
> - Replace several unsigned int with uint32_t.
> - Add consistency to field names (sge_n => sges_n).
> - Make mbuf size checks in RX queue setup function similar to mlx5.
> - Update various comments.
> - Fix indentation.
> - Replace run-time endian conversion with static ones where possible.
> - Reorder fields in struct rxq and struct txq for consistency, remove
>   one level of unnecessary inner structures.
> - Fix memory leak on Tx bounce buffer.
> - Update commit logs.
> - Fix remaining checkpatch warnings.
> 
> v2 (Matan):
> Rearange patches.
> Semantics.
> Enhancements.
> Fix compilation issues.
> 
> Moti Haimovsky (6):
>   net/mlx4: add simple Tx bypassing Verbs
>   net/mlx4: restore full Rx support bypassing Verbs
>   net/mlx4: restore Tx gather support
>   net/mlx4: restore Tx checksum offloads
>   net/mlx4: restore Rx offloads
>   net/mlx4: add loopback Tx from VF
> 
> Ophir Munk (1):
>   net/mlx4: restore Rx scatter support

Thanks Ophir for merging both v3's.

Ferruh, v4 supersedes all prior revisions (Moti's v1, Matan's v2, my own v3
and Ophir's v3-bis, I can't update patchwork for all of them).

For the entire series:

Acked-by: Adrien Mazarguil 

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCH v8 1/7] mbuf: support GTP in software packet type parser

2017-10-05 Thread Sean Harte
On 5 October 2017 at 09:14, Beilei Xing  wrote:
> Add support of GTP-C and GTP-U tunnels in rte_net_get_ptype().
>
> Signed-off-by: Beilei Xing 
> Acked-by: Olivier Matz 
> ---
>  lib/librte_mbuf/rte_mbuf_ptype.c |  2 ++
>  lib/librte_mbuf/rte_mbuf_ptype.h | 32 
>  2 files changed, 34 insertions(+)
>
> diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c 
> b/lib/librte_mbuf/rte_mbuf_ptype.c
> index e5c4fae..a450814 100644
> --- a/lib/librte_mbuf/rte_mbuf_ptype.c
> +++ b/lib/librte_mbuf/rte_mbuf_ptype.c
> @@ -89,6 +89,8 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype)
> case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
> case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
> case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
> +   case RTE_PTYPE_TUNNEL_GTPC: return "TUNNEL_GTPC";
> +   case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU";
> default: return "TUNNEL_UNKNOWN";
> }
>  }
> diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h 
> b/lib/librte_mbuf/rte_mbuf_ptype.h
> index acd70bb..978c4a2 100644
> --- a/lib/librte_mbuf/rte_mbuf_ptype.h
> +++ b/lib/librte_mbuf/rte_mbuf_ptype.h
> @@ -383,6 +383,38 @@ extern "C" {
>   */
>  #define RTE_PTYPE_TUNNEL_GRENAT 0x6000
>  /**
> + * GTP-C (GPRS Tunnelling Protocol) control tunneling packet type.
> + * Packet format:
> + * <'ether type'=0x0800
> + * | 'version'=4, 'protocol'=17
> + * | 'destination port'=2123>
> + * or,
> + * <'ether type'=0x86DD
> + * | 'version'=6, 'next header'=17
> + * | 'destination port'=2123>
> + * or,
> + * <'ether type'=0x0800
> + * | 'version'=4, 'protocol'=17
> + * | 'source port'=2123>
> + * or,
> + * <'ether type'=0x86DD
> + * | 'version'=6, 'next header'=17
> + * | 'source port'=2123>
> + */
> +#define RTE_PTYPE_TUNNEL_GTPC   0x7000
> +/**
> + * GTP-U (GPRS Tunnelling Protocol) user data tunneling packet type.
> + * Packet format:
> + * <'ether type'=0x0800
> + * | 'version'=4, 'protocol'=17
> + * | 'destination port'=2152>
> + * or,
> + * <'ether type'=0x86DD
> + * | 'version'=6, 'next header'=17
> + * | 'destination port'=2152>
> + */
> +#define RTE_PTYPE_TUNNEL_GTPU   0x8000
> +/**
>   * Mask of tunneling packet types.
>   */
>  #define RTE_PTYPE_TUNNEL_MASK   0xf000
> --
> 2.5.5
>

Reviewed-by: Seán Harte 


Re: [dpdk-dev] [PATCH v8 3/7] ethdev: add GTP items to support flow API

2017-10-05 Thread Sean Harte
On 5 October 2017 at 09:14, Beilei Xing  wrote:
> This patch adds GTP, GTPC and GTPU items for
> generic flow API, and also exposes item fields
> through the flow command.
>
> Signed-off-by: Beilei Xing 
> Acked-by: Adrien Mazarguil 
> Acked-by: Jingjing Wu 
> ---
>  app/test-pmd/cmdline_flow.c | 40 ++
>  app/test-pmd/config.c   |  3 ++
>  doc/guides/prog_guide/rte_flow.rst  | 17 ++
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  4 +++
>  lib/librte_ether/rte_flow.h | 52 
> +
>  5 files changed, 116 insertions(+)
>
> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> index a17a004..26c3e4f 100644
> --- a/app/test-pmd/cmdline_flow.c
> +++ b/app/test-pmd/cmdline_flow.c
> @@ -171,6 +171,10 @@ enum index {
> ITEM_GRE_PROTO,
> ITEM_FUZZY,
> ITEM_FUZZY_THRESH,
> +   ITEM_GTP,
> +   ITEM_GTP_TEID,
> +   ITEM_GTPC,
> +   ITEM_GTPU,
>
> /* Validate/create actions. */
> ACTIONS,
> @@ -451,6 +455,9 @@ static const enum index next_item[] = {
> ITEM_MPLS,
> ITEM_GRE,
> ITEM_FUZZY,
> +   ITEM_GTP,
> +   ITEM_GTPC,
> +   ITEM_GTPU,
> ZERO,
>  };
>
> @@ -588,6 +595,12 @@ static const enum index item_gre[] = {
> ZERO,
>  };
>
> +static const enum index item_gtp[] = {
> +   ITEM_GTP_TEID,
> +   ITEM_NEXT,
> +   ZERO,
> +};
> +
>  static const enum index next_action[] = {
> ACTION_END,
> ACTION_VOID,
> @@ -1421,6 +1434,33 @@ static const struct token token_list[] = {
> .args = ARGS(ARGS_ENTRY(struct rte_flow_item_fuzzy,
> thresh)),
> },
> +   [ITEM_GTP] = {
> +   .name = "gtp",
> +   .help = "match GTP header",
> +   .priv = PRIV_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
> +   .next = NEXT(item_gtp),
> +   .call = parse_vc,
> +   },
> +   [ITEM_GTP_TEID] = {
> +   .name = "teid",
> +   .help = "tunnel endpoint identifier",
> +   .next = NEXT(item_gtp, NEXT_ENTRY(UNSIGNED), item_param),
> +   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_gtp, teid)),
> +   },
> +   [ITEM_GTPC] = {
> +   .name = "gtpc",
> +   .help = "match GTP header",
> +   .priv = PRIV_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
> +   .next = NEXT(item_gtp),
> +   .call = parse_vc,
> +   },
> +   [ITEM_GTPU] = {
> +   .name = "gtpu",
> +   .help = "match GTP header",
> +   .priv = PRIV_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
> +   .next = NEXT(item_gtp),
> +   .call = parse_vc,
> +   },
>
> /* Validate/create actions. */
> [ACTIONS] = {
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 60a8d07..4ec8f0d 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -952,6 +952,9 @@ static const struct {
> MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
> MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
> MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
> +   MK_FLOW_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
> +   MK_FLOW_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
> +   MK_FLOW_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
>  };
>
>  /** Compute storage space needed by item specification. */
> diff --git a/doc/guides/prog_guide/rte_flow.rst 
> b/doc/guides/prog_guide/rte_flow.rst
> index 662a912..73f12ee 100644
> --- a/doc/guides/prog_guide/rte_flow.rst
> +++ b/doc/guides/prog_guide/rte_flow.rst
> @@ -955,6 +955,23 @@ Usage example, fuzzy match a TCPv4 packets:
> | 4 | END  |
> +---+--+
>
> +Item: ``GTP``, ``GTPC``, ``GTPU``
> +^
> +
> +Matches a GTPv1 header.
> +
> +Note: GTP, GTPC and GTPU use the same structure. GTPC and GTPU item
> +are defined for a user-friendly API when creating GTP-C and GTP-U
> +flow rules.
> +
> +- ``v_pt_rsv_flags``: version (3b), protocol type (1b), reserved (1b),
> +  extension header flag (1b), sequence number flag (1b), N-PDU number
> +  flag (1b).
> +- ``msg_type``: message type.
> +- ``msg_len``: message length.
> +- ``teid``: tunnel endpoint identifier.
> +- Default ``mask`` matches teid only.
> +
>  Actions
>  ~~~
>
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index aeef3e1..32223ca 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -2721,6 +2721,10 @@ This section lists supported pattern items and their 
> attributes, if any.
>
>- ``thresh {unsigned}``: accuracy threshold.
>
> +- ``gtp``, ``gtpc``, ``gtpu``: match GTPv1 he

  1   2   3   4   >