[dpdk-dev] [PATCH v1] Return ENOMEM during mpipe_devinit failure

2015-08-20 Thread Tetsuya Mukawa
On 2015/08/20 5:00, Ravi Kerur wrote:
> In function rte_pmd_mpipe_devinit, if rte_eth_dev_allocate
> fails return error which is inline with other drivers.
>
> Signed-off-by: Ravi Kerur 
> ---
>  drivers/net/mpipe/mpipe_tilegx.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/net/mpipe/mpipe_tilegx.c 
> b/drivers/net/mpipe/mpipe_tilegx.c
> index 743feef..6e3e304 100644
> --- a/drivers/net/mpipe/mpipe_tilegx.c
> +++ b/drivers/net/mpipe/mpipe_tilegx.c
> @@ -1582,6 +1582,7 @@ rte_pmd_mpipe_devinit(const char *ifname,
>   if (!eth_dev) {
>   RTE_LOG(ERR, PMD, "%s: Failed to allocate device.\n", ifname);
>   rte_free(priv);
> + return -ENOMEM;
>   }
>  
>   RTE_LOG(INFO, PMD, "%s: Initialized mpipe device"
Acked-by: Tetsuya Mukawa 


[dpdk-dev] [PATCH v2] Change rte_eal_vdev_init to update port_id

2015-08-20 Thread Tetsuya Mukawa
On 2015/08/20 4:42, Ravi Kerur wrote:
> v2:
>> Remove rte_pmd_mpipe_devinit changes
>> Use rte_eal_compare_pci_addr for address comparison
>> Use dpdk_2.2 in version map file for new functions
>
> v1:
> Changes include
>> Modify rte_eal_vdev_init to return allocated port_id
>> Modify rte_eal_probe_one to return allocated port_id
>
> 2. Removed following functions
>> rte_eth_dev_save and
>> rte_eth_dev_get_changed_port
>
> 3. Added 2 new functions
>> rte_eth_dev_get_port_by_name
>> rte_eth_dev_get_port_by_addr
>
> 4. Fix return error(ENOMEM) in function rte_pmd_mpipe_devinit
>
> Compiled on Linux for following targets
>> x86_64-native-linuxapp-gcc
>> x86_64-native-linuxapp-clang
>> x86_x32-native-linuxapp-gcc
>
> Compiled on FreeBSD for following targets
>> x86_64-native-bsdapp-clang
>> x86_64-native-bsdapp-gcc
>
> Tested on Linux/FreeBSD:
>> port attach eth_null
>> port start all
>> port stop all
>> port close all
>> port detach 0
>> port attach eth_null
>> port start all
>> port stop all
>> port close all
>> port detach 0
>
> Successful run of checkpatch.pl on the diffs
>
> Successful validate_abi on Linux for following targets
>
>> x86_64-native-linuxapp-gcc
>> x86_64-native-linuxapp-clang
>
> Signed-off-by: Ravi Kerur 
> ---
>  drivers/net/enic/enic_ethdev.c  |   2 +-
>  lib/librte_eal/common/eal_common_dev.c  |  13 ++--
>  lib/librte_eal/common/eal_common_pci.c  |   6 +-
>  lib/librte_eal/common/include/rte_dev.h |  36 +-
>  lib/librte_eal/common/include/rte_pci.h |   4 +-
>  lib/librte_ether/rte_ethdev.c   | 122 
> +---
>  lib/librte_ether/rte_ether_version.map  |   8 +++
>  7 files changed, 125 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
> index 8280cea..472ef5a 100644
> --- a/drivers/net/enic/enic_ethdev.c
> +++ b/drivers/net/enic/enic_ethdev.c
> @@ -36,8 +36,8 @@
>  #include 
>  #include 
>  
> -#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> diff --git a/lib/librte_eal/common/eal_common_dev.c 
> b/lib/librte_eal/common/eal_common_dev.c
> index 4089d66..ffdb3b5 100644
> --- a/lib/librte_eal/common/eal_common_dev.c
> +++ b/lib/librte_eal/common/eal_common_dev.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -64,7 +65,7 @@ rte_eal_driver_unregister(struct rte_driver *driver)
>  }
>  
>  int
> -rte_eal_vdev_init(const char *name, const char *args)
> +rte_eal_vdev_init(const char *name, const char *args, uint8_t *port_id)
>  {
>   struct rte_driver *driver;
>  
> @@ -81,8 +82,12 @@ rte_eal_vdev_init(const char *name, const char *args)
>* will be "eth_pcap", but "name" will be "eth_pcapN".
>* So use strncmp to compare.
>*/
> - if (!strncmp(driver->name, name, strlen(driver->name)))
> - return driver->init(name, args);
> + if (!strncmp(driver->name, name, strlen(driver->name))) {
> + if (!driver->init(name, args))
> + return rte_eth_dev_get_port_by_name(
> + name, port_id);
> + }
> +

Please remove needless line.

>   }
>  
>   RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
> @@ -108,7 +113,7 @@ rte_eal_dev_init(void)
>   continue;
>  
>   if (rte_eal_vdev_init(devargs->virtual.drv_name,
> - devargs->args)) {
> + devargs->args, NULL)) {
>   RTE_LOG(ERR, EAL, "failed to initialize %s device\n",
>   devargs->virtual.drv_name);
>   return -1;
> diff --git a/lib/librte_eal/common/eal_common_pci.c 
> b/lib/librte_eal/common/eal_common_pci.c
> index 16e8629..3d97892 100644
> --- a/lib/librte_eal/common/eal_common_pci.c
> +++ b/lib/librte_eal/common/eal_common_pci.c
> @@ -79,6 +79,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "eal_private.h"
>  
> @@ -322,7 +323,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
>   * the driver of the devive.
>   */
>  int
> -rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
> +rte_eal_pci_probe_one(const struct rte_pci_addr *addr, uint8_t *port_id)
>  {
>   struct rte_pci_device *dev = NULL;
>   int ret = 0;
> @@ -337,7 +338,8 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
>   ret = pci_probe_all_drivers(dev);
>   if (ret < 0)
>   goto err_return;
> - return 0;
> +
> + return rte_eth_dev_get_port_by_addr(addr, port_id);
>   }
>   return -1;
>  
> diff --git a/lib/librte_eal/common/include/rte_dev.h 
> b/lib/librte_eal/common/include/rte_dev.h
> index f601d21..56

[dpdk-dev] [PATCH v1] Return ENOMEM during mpipe_devinit failure

2015-08-20 Thread Tony Lu
>-Original Message-
>From: Ravi Kerur [mailto:rkerur at gmail.com]
>Sent: Thursday, August 20, 2015 4:00 AM
>To: dev at dpdk.org
>Cc: zlu at ezchip.com; Ravi Kerur
>Subject: [PATCH v1] Return ENOMEM during mpipe_devinit failure
>
>In function rte_pmd_mpipe_devinit, if rte_eth_dev_allocate
>fails return error which is inline with other drivers.
>
>Signed-off-by: Ravi Kerur 
>---
> drivers/net/mpipe/mpipe_tilegx.c | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/drivers/net/mpipe/mpipe_tilegx.c
>b/drivers/net/mpipe/mpipe_tilegx.c
>index 743feef..6e3e304 100644
>--- a/drivers/net/mpipe/mpipe_tilegx.c
>+++ b/drivers/net/mpipe/mpipe_tilegx.c
>@@ -1582,6 +1582,7 @@ rte_pmd_mpipe_devinit(const char *ifname,
>   if (!eth_dev) {
>   RTE_LOG(ERR, PMD, "%s: Failed to allocate device.\n",
ifname);
>   rte_free(priv);
>+  return -ENOMEM;
>   }
>
>   RTE_LOG(INFO, PMD, "%s: Initialized mpipe device"
>--
>1.9.1

Thanks for fixing this.
Acked-by: Zhigang Lu 



[dpdk-dev] [PATCH] vhost: fix qemu shutdown issue

2015-08-20 Thread Ouyang Changchun
This patch originates from the patch:
[dpdk-dev] [PATCH 1/2] Patch for Qemu wrapper for US-VHost to ensure Qemu 
process ends when VM is shutdown
http://dpdk.org/ml/archives/dev/2014-June/003606.html

Aslo update the vhost sample guide doc.

Signed-off-by: Claire Murphy 
Signed-off-by: Changchun Ouyang 
---
 doc/guides/sample_app_ug/vhost.rst|  9 -
 lib/librte_vhost/libvirt/qemu-wrap.py | 29 +
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index 730b9da..743908d 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -717,15 +717,6 @@ Common Issues
 needs access to the shared memory from the guest to receive and transmit 
packets. It is important to make sure
 the QEMU version supports shared memory mapping.

-*   Issues with ``virsh destroy`` not destroying the VM:
-
-Using libvirt ``virsh create`` the ``qemu-wrap.py`` spawns a new process 
to run ``qemu-kvm``. This impacts the behavior
-of ``virsh destroy`` which kills the process running ``qemu-wrap.py`` 
without actually destroying the VM (it leaves
-the ``qemu-kvm`` process running):
-
-This following patch should fix this issue:
-http://dpdk.org/ml/archives/dev/2014-June/003607.html
-
 *   In an Ubuntu environment, QEMU fails to start a new guest normally with 
user space VHOST due to not being able
 to allocate huge pages for the new guest:

diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py 
b/lib/librte_vhost/libvirt/qemu-wrap.py
index 5096011..30a0d50 100755
--- a/lib/librte_vhost/libvirt/qemu-wrap.py
+++ b/lib/librte_vhost/libvirt/qemu-wrap.py
@@ -76,6 +76,7 @@
 #"/dev/ptmx", "/dev/kvm", "/dev/kqemu",
 #"/dev/rtc", "/dev/hpet", "/dev/net/tun",
 #"/dev/-",
+#"/dev/hugepages",
 #]
 #
 #   4.b) Disable SELinux or set to permissive mode
@@ -161,6 +162,8 @@ hugetlbfs_dir = ""
 #

 import sys, os, subprocess
+import time
+import signal


 #List of open userspace vhost file descriptors
@@ -174,6 +177,18 @@ vhost_flags = [ "csum=off",
 "guest_ecn=off"
   ]

+#String of the path to the Qemu process pid
+qemu_pid = "/tmp/%d-qemu.pid" % os.getpid()
+
+#
+# Signal haldler to kill Qemu subprocess
+#
+def kill_qemu_process(signum, stack):
+pidfile = open(qemu_pid, 'r')
+pid = int(pidfile.read())
+os.killpg(pid, signal.SIGTERM)
+pidfile.close()
+

 #
 # Find the system hugefile mount point.
@@ -280,7 +295,7 @@ def main():
 while (num < num_cmd_args):
 arg = sys.argv[num]

-   #Check netdev +1 parameter for vhostfd
+   #Check netdev +1 parameter for vhostfd
 if arg == '-netdev':
 num_vhost_devs = len(fd_list)
 new_args.append(arg)
@@ -333,7 +348,6 @@ def main():
 emul_call += mp
 emul_call += " "

-
 #add user options
 for opt in emul_opts_user:
 emul_call += opt
@@ -353,14 +367,21 @@ def main():
 emul_call+=str(arg)
 emul_call+= " "

+emul_call += "-pidfile %s " % qemu_pid
 #Call QEMU
-subprocess.call(emul_call, shell=True)
+process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid)
+
+for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]:
+signal.signal(sig, kill_qemu_process)

+process.wait()

 #Close usvhost files
 for fd in fd_list:
 os.close(fd)
-
+#Cleanup temporary files
+if os.access(qemu_pid, os.F_OK):
+os.remove(qemu_pid)

 if __name__ == "__main__":
 main()
-- 
1.8.4.2



[dpdk-dev] [PATCH v2] kni: Use utsrelease.h to determine Ubuntu kernel version

2015-08-20 Thread Simon Kagstrom
/proc/version_signature is the version for the host machine, but in
e.g., chroots, this does not necessarily match that DPDK is built
for. DPDK will then build for the wrong kernel version - that of the
server, and not that installed in the (build) chroot.

The patch uses utsrelease.h from the kernel sources instead and fakes
the upload version.

Tested on a server with Ubuntu 12.04, building in a chroot for Ubuntu
14.04.

Signed-off-by: Simon Kagstrom 
Signed-off-by: Johan Faltstrom 
---
ChangeLog:

v2: Improve description and motivation for the patch.

 lib/librte_eal/linuxapp/kni/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/Makefile 
b/lib/librte_eal/linuxapp/kni/Makefile
index fb673d9..ac99d3f 100644
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -44,10 +44,10 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include 
-I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/e
 MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
 MODULE_CFLAGS += -Wall -Werror

-ifeq ($(shell test -f /proc/version_signature && lsb_release -si 
2>/dev/null),Ubuntu)
+ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu)
 MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .)
-UBUNTU_KERNEL_CODE := $(shell cut -d' ' -f2 /proc/version_signature | \
-cut -d'~' -f1 | cut -d- -f1,2 | tr .- $(comma))
+UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE 
$(RTE_KERNELDIR)/include/generated/utsrelease.h \
+| cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1)
 MODULE_CFLAGS += 
-D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
 endif

-- 
1.9.1



[dpdk-dev] flow_director_filter error!!

2015-08-20 Thread Wu, Jingjing
Which NIC are you using? I540? Are you talking about X540?

And does other operations work except flush?

Thanks
Jingjing

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Navneet Rao
> Sent: Thursday, August 20, 2015 3:27 AM
> To: dev at dpdk.org
> Subject: Re: [dpdk-dev] flow_director_filter error!!
> 
> Any suggestions?
> 
> Thanks
> -Navneet
> 
> 
> -Original Message-
> From: Navneet Rao
> Sent: Tuesday, August 18, 2015 4:01 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] flow_director_filter error!!
> 
> Hello:
> 
> 
> 
> Using dpdk-2.0.0 and i540 -
> 
> 
> 
> I am using the testpmd app to test-drive the flow-director filter settings.
> 
> After I start the testpmd app, I am flusing the flow_director_filter settings
> and get the following error -
> 
> 
> 
> testpmd> flush_flow_director 0
> 
> PMD: ixgbe_fdir_flush(): Failed to re-initialize FD table.
> 
> flow director table flushing error: (Too many open files in system)
> 
> 
> 
> Any clues or directions?
> 
> 
> 
> Thanks
> 
> -Navneet
> 
> 


[dpdk-dev] [RFC] combining dpdk with ovs via vhost_net

2015-08-20 Thread Zhuangyanying
Hi all:
   AFAIK, nowadays there's only one solution to apply DPDK into Docker 
Containers, which is Passing-Through physical NIC to applications.
   I'm now working on another solution, considering combining DPDK and OVS via 
vhost-net, I name it "vhost_net pmd driver".
   The detailed solution is as follows:
   1 Similar to the process of qemu<->vhost_net, we use a serial of ioctl 
commands to make virtqueue visible to both vhost_net and vhost_net pmd driver.
   2 In kvm guests, the tx/rx queue is consisted of GPA addresses, and the 
vhost_net will transform it into HVA addresses, then the tap device could copy 
datagram afterwards. However,  GPA addresses are not necessary for containers 
to fulfill the tx/rx queue. Thus, we fake it to fulfill the HVA addresses into 
the tx/rx queues, and pass the (HVA, HVA) map table to vhost_net by 
VHOST_SET_MEM_TABLE ioctl during initialization. Thus *the vhost_net codes 
could keep untouched*.
   3 the packet-transceiver-process is totally the same to virtio pmd driver.

   The demo has been worked out already. In the demo, the dpdk could directly 
access vhost_net to realize L2 forward.
 clients  |  host   |contrainer
  ping| |
vm0   - > |ixgbe:enp131s0f0 <-> ovs:br0  <-> vhost:tap0 |<-> vhost-net pmd
  | | |
  | |  testpmd
  | | |
vm1  <--  |ixgbe:enp131s0f1 <-> ovs:br1  <-> vhost:tap1 |<-> vhost-net pmd
  | |

 I don't know wheter this solution is acceptable here. Any blueprints for 
combining container with dpdk? any suggestions or advices? Thanks in advance.


---
Ann


[dpdk-dev] [RFC] combining dpdk with ovs via vhost_net

2015-08-20 Thread Xie, Huawei
Hi Yanping:
I don't quite get your idea. Last year I had a design and POC which enables  
user space virtio interface  in container.
Don't know if it has similarity with your proposal. Would post the idea later 
in the following mail.

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Zhuangyanying
> Sent: Thursday, August 20, 2015 3:50 PM
> To: dev at dpdk.org
> Cc: gaoxiaoqiu; Zhangbo (Oscar); Zhbzg; Guohongzhen; Zhoujingbin
> Subject: [dpdk-dev] [RFC] combining dpdk with ovs via vhost_net
> 
> Hi all:
>AFAIK, nowadays there's only one solution to apply DPDK into Docker
> Containers, which is Passing-Through physical NIC to applications.
>I'm now working on another solution, considering combining DPDK and
> OVS via vhost-net, I name it "vhost_net pmd driver".
>The detailed solution is as follows:
>1 Similar to the process of qemu<->vhost_net, we use a serial of ioctl
> commands to make virtqueue visible to both vhost_net and vhost_net pmd
> driver.
>2 In kvm guests, the tx/rx queue is consisted of GPA addresses, and the
> vhost_net will transform it into HVA addresses, then the tap device could
> copy datagram afterwards. However,  GPA addresses are not necessary for
> containers to fulfill the tx/rx queue. Thus, we fake it to fulfill the HVA
> addresses into the tx/rx queues, and pass the (HVA, HVA) map table to
> vhost_net by VHOST_SET_MEM_TABLE ioctl during initialization. Thus *the
> vhost_net codes could keep untouched*.
>3 the packet-transceiver-process is totally the same to virtio pmd driver.
> 
>The demo has been worked out already. In the demo, the dpdk could
> directly access vhost_net to realize L2 forward.
>  clients  |  host   |contrainer
>   ping| |
> vm0   - > |ixgbe:enp131s0f0 <-> ovs:br0  <-> vhost:tap0 |<-> vhost-net
> pmd
>   | | |
>   | |  testpmd
>   | | |
> vm1  <--  |ixgbe:enp131s0f1 <-> ovs:br1  <-> vhost:tap1 |<-> vhost-net
> pmd
>   | |
> 
>  I don't know wheter this solution is acceptable here. Any blueprints for
> combining container with dpdk? any suggestions or advices? Thanks in
> advance.
> 
> 
> ---
> Ann


[dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 for all NICs but 82598

2015-08-20 Thread Ananyev, Konstantin
Hi Vlad,

> -Original Message-
> From: Vlad Zolotarov [mailto:vladz at cloudius-systems.com]
> Sent: Wednesday, August 19, 2015 11:03 AM
> To: Ananyev, Konstantin; Lu, Wenzhuo
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 for 
> all NICs but 82598
> 
> 
> 
> On 08/19/15 10:43, Ananyev, Konstantin wrote:
> > Hi Vlad,
> > Sorry for delay with review, I am OOO till next week.
> > Meanwhile, few questions/comments from me.
> 
> Hi, Konstantin, long time no see... ;)
> 
> >
> >> This patch fixes the Tx hang we were constantly hitting with a
> >> seastar-based
> >> application on x540 NIC.
> > Could you help to share with us how to reproduce the tx hang issue,
> >> with using
> > typical DPDK examples?
>  Sorry. I'm not very familiar with the typical DPDK examples to help u
>  here. However this is quite irrelevant since without this this patch
>  ixgbe PMD obviously abuses the HW spec as has been explained above.
> 
>  We saw the issue when u stressed the xmit path with a lot of highly
>  fragmented TCP frames (packets with up to 33 fragments with non-headers
>  fragments as small as 4 bytes) with all offload features enabled.
> > Could you provide us with the pcap file to reproduce the issue?
> 
> Well, the thing is it takes some time to reproduce it (a few minutes of
> heavy load) therefore a pcap would be quite large.

Probably you can upload it to some place, from which we will be able to 
download it?
Or might be you have some sort of scapy script to generate it?
I suppose we'll need something to reproduce the issue and verify the fix.   

> 
> > My concern with you approach is that it would affect TX performance.
> 
> It certainly will ;) But it seem inevitable. See below.
> 
> > Right now, for simple TX PMD usually reads only (nb_tx_desc/tx_rs_thresh) 
> > TXDs,
> > While with your patch (if I understand it correctly) it has to read all 
> > TXDs in the HW TX ring.
> 
> If by "simple" u refer an always single fragment per Tx packet - then u
> are absolutely correct.
> 
> My initial patch was to only set RS on every EOP descriptor without
> changing the rs_thresh value and this patch worked.
> However HW spec doesn't ensure in a general case that packets are always
> handled/completion write-back completes in the same order the packets
> are placed on the ring (see "Tx arbitration schemes" chapter in 82599
> spec for instance). Therefore AFAIU one should not assume that if
> packet[x+1] DD bit is set then packet[x] is completed too.


[dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 for all NICs but 82598

2015-08-20 Thread Vlad Zolotarov


On 08/20/15 11:41, Ananyev, Konstantin wrote:
> Hi Vlad,
>
>> -Original Message-
>> From: Vlad Zolotarov [mailto:vladz at cloudius-systems.com]
>> Sent: Wednesday, August 19, 2015 11:03 AM
>> To: Ananyev, Konstantin; Lu, Wenzhuo
>> Cc: dev at dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 
>> for all NICs but 82598
>>
>>
>>
>> On 08/19/15 10:43, Ananyev, Konstantin wrote:
>>> Hi Vlad,
>>> Sorry for delay with review, I am OOO till next week.
>>> Meanwhile, few questions/comments from me.
>> Hi, Konstantin, long time no see... ;)
>>
 This patch fixes the Tx hang we were constantly hitting with a
 seastar-based
 application on x540 NIC.
>>> Could you help to share with us how to reproduce the tx hang issue,
 with using
>>> typical DPDK examples?
>> Sorry. I'm not very familiar with the typical DPDK examples to help u
>> here. However this is quite irrelevant since without this this patch
>> ixgbe PMD obviously abuses the HW spec as has been explained above.
>>
>> We saw the issue when u stressed the xmit path with a lot of highly
>> fragmented TCP frames (packets with up to 33 fragments with non-headers
>> fragments as small as 4 bytes) with all offload features enabled.
>>> Could you provide us with the pcap file to reproduce the issue?
>> Well, the thing is it takes some time to reproduce it (a few minutes of
>> heavy load) therefore a pcap would be quite large.
> Probably you can upload it to some place, from which we will be able to 
> download it?

I'll see what I can do but no promises...

> Or might be you have some sort of scapy script to generate it?
> I suppose we'll need something to reproduce the issue and verify the fix.

Since the original code abuses the HW spec u don't have to... ;)

>
>>> My concern with you approach is that it would affect TX performance.
>> It certainly will ;) But it seem inevitable. See below.
>>
>>> Right now, for simple TX PMD usually reads only (nb_tx_desc/tx_rs_thresh) 
>>> TXDs,
>>> While with your patch (if I understand it correctly) it has to read all 
>>> TXDs in the HW TX ring.
>> If by "simple" u refer an always single fragment per Tx packet - then u
>> are absolutely correct.
>>
>> My initial patch was to only set RS on every EOP descriptor without
>> changing the rs_thresh value and this patch worked.
>> However HW spec doesn't ensure in a general case that packets are always
>> handled/completion write-back completes in the same order the packets
>> are placed on the ring (see "Tx arbitration schemes" chapter in 82599
>> spec for instance). Therefore AFAIU one should not assume that if
>> packet[x+1] DD bit is set then packet[x] is completed too.
>  From my understanding, TX arbitration controls the order in which TXDs from
> different queues are fetched/processed.
> But descriptors from the same TX queue are processed in FIFO order.
> So, I think that  - yes, if TXD[x+1] DD bit is set, then TXD[x] is completed 
> too,
> and setting RS on every EOP TXD should be enough.

Ok. I'll rework the patch under this assumption then.

>
>> That's why I changed the patch to be as u see it now. However if I miss
>> something here and your HW people ensure the in-order completion this of
>> course may be changed back.
>>
>>> Even if we really need to setup RS bit in each TXD (I still doubt we really 
>>> do) - ,
>> Well, if u doubt u may ask the guys from the Intel networking division
>> that wrote the 82599 and x540 HW specs where they clearly state that. ;)
> Good point, we'll see what we can do here :)
> Konstantin
>
>>> I think inside PMD it still should be possible to check TX completion in 
>>> chunks.
>>> Konstantin
>>>
>>>
>> Thanks,
>> vlad
 Signed-off-by: Vlad Zolotarov 
 ---
 drivers/net/ixgbe/ixgbe_ethdev.c |  9 +
 drivers/net/ixgbe/ixgbe_rxtx.c   | 23 ++-
 2 files changed, 31 insertions(+), 1 deletion(-)

 diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
 b/drivers/net/ixgbe/ixgbe_ethdev.c
 index b8ee1e9..6714fd9 100644
 --- a/drivers/net/ixgbe/ixgbe_ethdev.c
 +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
 @@ -2414,6 +2414,15 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev,
>> struct
 rte_eth_dev_info *dev_info)
  .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
  ETH_TXQ_FLAGS_NOOFFLOADS,
  };
 +
 +  /*
 +   * According to 82599 and x540 specifications RS bit *must* be
 set on
>> the
 +   * last descriptor of *every* packet. Therefore we will not allow
 the
 +   * tx_rs_thresh above 1 for all NICs newer than 82598.
 +   */
 +  if (hw->mac.type > ixgbe_mac_82598EB)
 +  dev_info->default_txconf.tx_rs_thresh = 1;
 +
  dev_info->hash_key_size = IXGBE_HK

[dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 for all NICs but 82598

2015-08-20 Thread Vlad Zolotarov


On 08/20/15 11:56, Vlad Zolotarov wrote:
>
>
> On 08/20/15 11:41, Ananyev, Konstantin wrote:
>> Hi Vlad,
>>
>>> -Original Message-
>>> From: Vlad Zolotarov [mailto:vladz at cloudius-systems.com]
>>> Sent: Wednesday, August 19, 2015 11:03 AM
>>> To: Ananyev, Konstantin; Lu, Wenzhuo
>>> Cc: dev at dpdk.org
>>> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh 
>>> above 1 for all NICs but 82598
>>>
>>>
>>>
>>> On 08/19/15 10:43, Ananyev, Konstantin wrote:
 Hi Vlad,
 Sorry for delay with review, I am OOO till next week.
 Meanwhile, few questions/comments from me.
>>> Hi, Konstantin, long time no see... ;)
>>>
> This patch fixes the Tx hang we were constantly hitting with a
> seastar-based
> application on x540 NIC.
 Could you help to share with us how to reproduce the tx hang 
 issue,
> with using
 typical DPDK examples?
>>> Sorry. I'm not very familiar with the typical DPDK examples to 
>>> help u
>>> here. However this is quite irrelevant since without this this 
>>> patch
>>> ixgbe PMD obviously abuses the HW spec as has been explained above.
>>>
>>> We saw the issue when u stressed the xmit path with a lot of highly
>>> fragmented TCP frames (packets with up to 33 fragments with 
>>> non-headers
>>> fragments as small as 4 bytes) with all offload features enabled.
 Could you provide us with the pcap file to reproduce the issue?
>>> Well, the thing is it takes some time to reproduce it (a few minutes of
>>> heavy load) therefore a pcap would be quite large.
>> Probably you can upload it to some place, from which we will be able 
>> to download it?
>
> I'll see what I can do but no promises...

On a second thought pcap file won't help u much since in order to 
reproduce the issue u have to reproduce exactly the same structure of 
clusters i give to HW and it's not what u see on wire in a TSO case.

>
>> Or might be you have some sort of scapy script to generate it?
>> I suppose we'll need something to reproduce the issue and verify the 
>> fix.
>
> Since the original code abuses the HW spec u don't have to... ;)
>
>>
 My concern with you approach is that it would affect TX performance.
>>> It certainly will ;) But it seem inevitable. See below.
>>>
 Right now, for simple TX PMD usually reads only 
 (nb_tx_desc/tx_rs_thresh) TXDs,
 While with your patch (if I understand it correctly) it has to read 
 all TXDs in the HW TX ring.
>>> If by "simple" u refer an always single fragment per Tx packet - then u
>>> are absolutely correct.
>>>
>>> My initial patch was to only set RS on every EOP descriptor without
>>> changing the rs_thresh value and this patch worked.
>>> However HW spec doesn't ensure in a general case that packets are 
>>> always
>>> handled/completion write-back completes in the same order the packets
>>> are placed on the ring (see "Tx arbitration schemes" chapter in 82599
>>> spec for instance). Therefore AFAIU one should not assume that if
>>> packet[x+1] DD bit is set then packet[x] is completed too.
>>  From my understanding, TX arbitration controls the order in which 
>> TXDs from
>> different queues are fetched/processed.
>> But descriptors from the same TX queue are processed in FIFO order.
>> So, I think that  - yes, if TXD[x+1] DD bit is set, then TXD[x] is 
>> completed too,
>> and setting RS on every EOP TXD should be enough.
>
> Ok. I'll rework the patch under this assumption then.
>
>>
>>> That's why I changed the patch to be as u see it now. However if I miss
>>> something here and your HW people ensure the in-order completion 
>>> this of
>>> course may be changed back.
>>>
 Even if we really need to setup RS bit in each TXD (I still doubt 
 we really do) - ,
>>> Well, if u doubt u may ask the guys from the Intel networking division
>>> that wrote the 82599 and x540 HW specs where they clearly state 
>>> that. ;)
>> Good point, we'll see what we can do here :)
>> Konstantin
>>
 I think inside PMD it still should be possible to check TX 
 completion in chunks.
 Konstantin


>>> Thanks,
>>> vlad
> Signed-off-by: Vlad Zolotarov 
> ---
> drivers/net/ixgbe/ixgbe_ethdev.c |  9 +
> drivers/net/ixgbe/ixgbe_rxtx.c   | 23 ++-
> 2 files changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
> b/drivers/net/ixgbe/ixgbe_ethdev.c
> index b8ee1e9..6714fd9 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -2414,6 +2414,15 @@ ixgbe_dev_info_get(struct rte_eth_dev 
> *dev,
>>> struct
> rte_eth_dev_info *dev_info)
>  .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
> ETH_TXQ_FLAGS_NOOFFLOADS,
>  };
> +
> +  /*
> +   * According to 8

[dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh above 1 for all NICs but 82598

2015-08-20 Thread Vlad Zolotarov


On 08/20/15 12:05, Vlad Zolotarov wrote:
>
>
> On 08/20/15 11:56, Vlad Zolotarov wrote:
>>
>>
>> On 08/20/15 11:41, Ananyev, Konstantin wrote:
>>> Hi Vlad,
>>>
 -Original Message-
 From: Vlad Zolotarov [mailto:vladz at cloudius-systems.com]
 Sent: Wednesday, August 19, 2015 11:03 AM
 To: Ananyev, Konstantin; Lu, Wenzhuo
 Cc: dev at dpdk.org
 Subject: Re: [dpdk-dev] [PATCH v1] ixgbe_pmd: forbid tx_rs_thresh 
 above 1 for all NICs but 82598



 On 08/19/15 10:43, Ananyev, Konstantin wrote:
> Hi Vlad,
> Sorry for delay with review, I am OOO till next week.
> Meanwhile, few questions/comments from me.
 Hi, Konstantin, long time no see... ;)

>> This patch fixes the Tx hang we were constantly hitting with a
>> seastar-based
>> application on x540 NIC.
> Could you help to share with us how to reproduce the tx hang 
> issue,
>> with using
> typical DPDK examples?
 Sorry. I'm not very familiar with the typical DPDK examples to 
 help u
 here. However this is quite irrelevant since without this this 
 patch
 ixgbe PMD obviously abuses the HW spec as has been explained 
 above.

 We saw the issue when u stressed the xmit path with a lot of 
 highly
 fragmented TCP frames (packets with up to 33 fragments with 
 non-headers
 fragments as small as 4 bytes) with all offload features enabled.
> Could you provide us with the pcap file to reproduce the issue?
 Well, the thing is it takes some time to reproduce it (a few 
 minutes of
 heavy load) therefore a pcap would be quite large.
>>> Probably you can upload it to some place, from which we will be able 
>>> to download it?
>>
>> I'll see what I can do but no promises...
>
> On a second thought pcap file won't help u much since in order to 
> reproduce the issue u have to reproduce exactly the same structure of 
> clusters i give to HW and it's not what u see on wire in a TSO case.

And not only in a TSO case... ;)

>
>>
>>> Or might be you have some sort of scapy script to generate it?
>>> I suppose we'll need something to reproduce the issue and verify the 
>>> fix.
>>
>> Since the original code abuses the HW spec u don't have to... ;)
>>
>>>
> My concern with you approach is that it would affect TX performance.
 It certainly will ;) But it seem inevitable. See below.

> Right now, for simple TX PMD usually reads only 
> (nb_tx_desc/tx_rs_thresh) TXDs,
> While with your patch (if I understand it correctly) it has to 
> read all TXDs in the HW TX ring.
 If by "simple" u refer an always single fragment per Tx packet - 
 then u
 are absolutely correct.

 My initial patch was to only set RS on every EOP descriptor without
 changing the rs_thresh value and this patch worked.
 However HW spec doesn't ensure in a general case that packets are 
 always
 handled/completion write-back completes in the same order the packets
 are placed on the ring (see "Tx arbitration schemes" chapter in 82599
 spec for instance). Therefore AFAIU one should not assume that if
 packet[x+1] DD bit is set then packet[x] is completed too.
>>>  From my understanding, TX arbitration controls the order in which 
>>> TXDs from
>>> different queues are fetched/processed.
>>> But descriptors from the same TX queue are processed in FIFO order.
>>> So, I think that  - yes, if TXD[x+1] DD bit is set, then TXD[x] is 
>>> completed too,
>>> and setting RS on every EOP TXD should be enough.
>>
>> Ok. I'll rework the patch under this assumption then.
>>
>>>
 That's why I changed the patch to be as u see it now. However if I 
 miss
 something here and your HW people ensure the in-order completion 
 this of
 course may be changed back.

> Even if we really need to setup RS bit in each TXD (I still doubt 
> we really do) - ,
 Well, if u doubt u may ask the guys from the Intel networking division
 that wrote the 82599 and x540 HW specs where they clearly state 
 that. ;)
>>> Good point, we'll see what we can do here :)
>>> Konstantin
>>>
> I think inside PMD it still should be possible to check TX 
> completion in chunks.
> Konstantin
>
>
 Thanks,
 vlad
>> Signed-off-by: Vlad Zolotarov 
>> ---
>> drivers/net/ixgbe/ixgbe_ethdev.c |  9 +
>> drivers/net/ixgbe/ixgbe_rxtx.c   | 23 
>> ++-
>> 2 files changed, 31 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c
>> b/drivers/net/ixgbe/ixgbe_ethdev.c
>> index b8ee1e9..6714fd9 100644
>> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
>> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
>> @@ -2414,6 +2414,15 @@ ixgbe_dev_info_get(struct rte

[dpdk-dev] vhost compliant virtio based networking interface in container

2015-08-20 Thread Xie, Huawei
Added dev at dpdk.org

On 8/20/2015 6:04 PM, Xie, Huawei wrote:
> Yanping:
> I read your mail, seems what we did are quite similar. Here i wrote a
> quick mail to describe our design. Let me know if it is the same thing.
>
> Problem Statement:
> We don't have a high performance networking interface in container for
> NFV. Current veth pair based interface couldn't be easily accelerated.
>
> The key components involved:
> 1.DPDK based virtio PMD driver in container.
> 2.device simulation framework in container.
> 3.dpdk(or kernel) vhost running in host.
>
> How virtio is created?
> A:  There is no "real" virtio-pci device in container environment.
> 1). Host maintains pools of memories, and shares memory to container.
> This could be accomplished through host share a huge page file to container.
> 2). Containers creates virtio rings based on the shared memory.
> 3). Container creates mbuf memory pools on the shared memory.
> 4) Container send the memory and vring information to vhost through
> vhost message. This could be done either through ioctl call or vhost
> user message.
>
> How vhost message is sent?
> A: There are two alternative ways to do this.
> 1) The customized virtio PMD is responsible for all the vring creation,
> and vhost message sending.
> 2) We could do this through a lightweight device simulation framework.
> The device simulation creates simple PCI bus. On the PCI bus,
> virtio-net PCI devices are created. The device simulations provides
> IOAPI for MMIO/IO access.
>2.1  virtio PMD configures the pseudo virtio device as how it does in
> KVM guest enviroment.
>2.2  Rather than using io instruction, virtio PMD uses IOAPI for IO
> operation on the virtio-net PCI device.
>2.3  The device simulation is responsible for device state machine
> simulation.
>2.4   The device simulation is responsbile for talking to vhost.
>  With this approach, we could minimize the virtio PMD modifications.
> The virtio PMD is like configuring a real virtio-net PCI device.
>
> Memory mapping?
> A: QEMU could access the whole guest memory in KVM enviroment. We need
> to fill the gap.
> container maps the shared memory to container's virtual address space
> and host maps it to host's virtual address space. There is a fixed
> offset mapping.
> Container creates shared vring based on the memory. Container also
> creates mbuf memory pool based on the shared memroy.
> In VHOST_SET_MEMORY_TABLE message, we send the memory mapping
> information for the shared memory. As we require mbuf pool created on
> the shared memory, and buffers are allcoated from the mbuf pools, dpdk
> vhost could translate the GPA in vring desc to host virtual.
>
>
> GPA or CVA in vring desc?
> To ease the memory translation, rather than using GPA, here we use
> CVA(container virtual address). This the tricky thing here.
> 1) virtio PMD writes vring's VFN rather than PFN to PFN register through
> IOAPI.
> 2) device simulation framework will use VFN as PFN.
> 3) device simulation sends SET_VRING_ADDR with CVA.
> 4) virtio PMD fills vring desc with CVA of the mbuf data pointer rather
> than GPA.
> So when host sees the CVA, it could translates it to HVA(host virtual
> address).
>
> Worth to note:
> The virtio interface in container follows the vhost message format, and
> is compliant with dpdk vhost implmentation, i.e, no dpdk vhost
> modification is needed.
> vHost isn't aware whether the incoming virtio comes from KVM guest or
> container.
>
> The pretty much covers the high level design. There are quite some low
> level issues. For example, 32bit PFN is enough for KVM guest, since we
> use 64bit VFN(virtual page frame number),  trick is done here through a
> special IOAPI.
>
> /huawei
>
>  
>
>
>
>
>
>



[dpdk-dev] Issue observed with execution of Reorder test app

2015-08-20 Thread Mukesh Dua
I see issue with reorder test app failing on x86 environment due to changes
made between release 2.0.0 and 2.1.0:

App reorder_test (app/test/test_reorder.c)

Function failing: test_reorder_insert

There had been some changes with respect to addition of parameter
is_initialized to the structure rte_reorder_buffer. In parallel the changes
were made to initialize some of the parameters in function
rte_reorder_insert

rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
{
uint32_t offset, position;
struct cir_buffer *order_buf = &b->order_buf;

*if (!b->is_initialized) {*
*b->min_seqn = mbuf->seqn;*
*b->is_initialized = 1;*
*}*

=> I don't see any reason to set b->min_seqn to mbuf->seqn and if that has
to be done, the conditional checks should have been modified in function
test_reorder_insert soon after a call to rte_reorder_insert. Additionally,
the next seqn number being populated should have been changed in function
test_reorder_insert:

ret = rte_reorder_insert(b, bufs[0]);
*if (!((ret == -1) && (rte_errno == ERANGE))) {*
*printf("%s:%d: No error inserting late packet with seqn:"*
*" 3 * size\n", __func__, __LINE__);*
*ret = -1;*
*goto exit;*
*}*

for (i = 0; i < num_bufs; i++)
bufs[i]->seqn = i;

On the other hand, changing the code in function rte_reorder_insert:
rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
{
uint32_t offset, position;
struct cir_buffer *order_buf = &b->order_buf;

if (!b->is_initialized) {
*b->min_seqn = 0;  //Removed initialization from mbuf->seqn*
b->is_initialized = 1;
}
fixes the issues and the test case passes.

Regards,
Mukesh


[dpdk-dev] Issue observed with execution of Reorder test app

2015-08-20 Thread Gonzalez Monroy, Sergio
On 20/08/2015 12:38, Mukesh Dua wrote:
> I see issue with reorder test app failing on x86 environment due to changes
> made between release 2.0.0 and 2.1.0:
>
> App reorder_test (app/test/test_reorder.c)
> 
> Function failing: test_reorder_insert
>
> There had been some changes with respect to addition of parameter
> is_initialized to the structure rte_reorder_buffer. In parallel the changes
> were made to initialize some of the parameters in function
> rte_reorder_insert
>
> rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
> {
>  uint32_t offset, position;
>  struct cir_buffer *order_buf = &b->order_buf;
>
> *if (!b->is_initialized) {*
> *b->min_seqn = mbuf->seqn;*
>  *b->is_initialized = 1;*
> *}*
>
> => I don't see any reason to set b->min_seqn to mbuf->seqn and if that has
> to be done, the conditional checks should have been modified in function
> test_reorder_insert soon after a call to rte_reorder_insert. Additionally,
> the next seqn number being populated should have been changed in function
> test_reorder_insert:
>
>  ret = rte_reorder_insert(b, bufs[0]);
> *if (!((ret == -1) && (rte_errno == ERANGE))) {*
> *printf("%s:%d: No error inserting late packet with seqn:"*
> *" 3 * size\n", __func__, __LINE__);*
> *ret = -1;*
> *goto exit;*
> *}*
>
>  for (i = 0; i < num_bufs; i++)
>  bufs[i]->seqn = i;
>
> On the other hand, changing the code in function rte_reorder_insert:
> rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
> {
>  uint32_t offset, position;
>  struct cir_buffer *order_buf = &b->order_buf;
>
>  if (!b->is_initialized) {
> *b->min_seqn = 0;  //Removed initialization from mbuf->seqn*
>  b->is_initialized = 1;
>  }
> fixes the issues and the test case passes.
>
> Regards,
> Mukesh
Hi Mukesh,

The reason for that change is explained in its commit message and also 
in this thread:
http://dpdk.org/ml/archives/dev/2015-May/017930.html

Hope this info helps to clarify your concern.

Sergio


[dpdk-dev] [PATCH 0/4] A proposed DPDK Crypto API and device framework

2015-08-20 Thread Declan Doherty
Co-authored-by: Des O Dea 
Co-authored-by: John Griffin 
Co-authored-by: Fiona Trahe 

This series of patches proposes a set of application burst oriented APIs for
asynchronous symmetric cryptographic  functions within DPDK. It also contains a
poll mode driver cryptographic device framework for the implementation of
crypto devices within DPDK.

In the patch set we also have included 2 reference implementations of crypto
PMDs, both are still early in development but act as an example of how we
envisage the APIs and device framework to be used. Currently both
implementations only support AES128-CBC with HMAC_SHA1/SHA256/SHA512
authentication operations. The first device is a purely software PMD based on
Intel's multi-buffer library, which utilises both AES-NI instructions and
vector operations to accelerate crypto operations and the second PMD utilises
Intel's Quick Assist Technology (on DH895xxC) to provide hardware accelerated
crypto operations.

 The proposed API set supports two functional modes of operation: 

1, A session oriented mode. In this mode the user creates a crypto session
which defines all the immutable data required to perform a particular crypto
operation in advance, including cipher/hash algorithms and operations to be
performed as well as the keys to used etc. The session is then referenced by
the crypto operation data structure which is a data structure specific to each
mbuf. It is used to contain all mutable data about the cryto operation to be
performed, such as data offsets and lengths into the mbuf's data payload for
cipher and hash operations to be performed. 

2, A session-less mode. In this mode the user is able to provision crypto
operations on an mbuf without the need to have a cached session created in
advance, but at the cost of entailing the overhead of calculating
authentication pre-computes and preforming key expansions in-line with the
crypto operation. Only the crypto operation data structure must be completed in
this mode but all of immutable crypto operation  parameters that would be
normally set within a session are now specified within the crypto operation
data structure. Once all mutable and immutable parameters are set the crypto
operation data structure can be attached to the specified mbuf and enqueued on
a specified crypto device for processing. 

The patch set contains the following features:
- Crypto device APIs and device framework
- Example implementation of a software crypto PMD based on multi-buffer library
- Example implementation of a hardware crypto PMD baed on Intel QAT(DH895xxC)
- Unit and performance test's which give and example of utilising the crypto 
API's.


Current Status: The patch set has only been compiled and tested with 64-bit
gcc. There is no support for chained mbuf's and as mentioned above the PMD's
have only currently implemented support for AES128-CBC/AES256-CBC/AES512-CBC
and HMAC_SHA1/SHA256/SHA512. At this stage we are looking for feedback on the
proposed API's and the framework implementations. 


Declan Doherty (3):
  cryptodev: Initial DPDK Crypto APIs and device framework release
  aesni_mb_pmd: Initial implementation of multi buffer based crypto
device
  app/test: add cryptodev unit and performance tests

John Griffin (1):
  qat_crypto_pmd: Addition of a new QAT DPDK PMD.

 app/test/Makefile  |7 +-
 app/test/test.c|   91 +-
 app/test/test.h|   34 +-
 app/test/test_cryptodev.c  | 1079 +++
 app/test/test_cryptodev_perf.c | 1438 
 app/test/test_link_bonding.c   |6 +-
 app/test/test_link_bonding_mode4.c |7 +-
 config/common_bsdapp   |   30 +-
 config/common_linuxapp |   29 +-
 doc/api/doxy-api-index.md  |1 +
 doc/api/doxy-api.conf  |1 +
 doc/guides/cryptodevs/aesni_mb.rst |   76 ++
 doc/guides/cryptodevs/index.rst|   43 +
 doc/guides/cryptodevs/qat.rst  |  155 +++
 doc/guides/index.rst   |1 +
 drivers/Makefile   |1 +
 drivers/crypto/Makefile|   38 +
 drivers/crypto/aesni_mb/Makefile   |   67 +
 drivers/crypto/aesni_mb/aesni_mb_ops.h |  206 +++
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c |  550 
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c |  346 +
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |  224 +++
 drivers/crypto/aesni_mb/rte_pmd_aesni_version.map  |5 +
 drivers/crypto/qat/Makefile|   63 +
 .../qat/qat_adf/adf_transport_access_macros.h  |  173 +++
 drivers/crypto/qat/qat_adf/icp_qat_fw.h|  316 +
 drivers/crypto/qat/qat_adf/icp_

[dpdk-dev] [PATCH 1/4] cryptodev: Initial DPDK Crypto APIs and device framework release

2015-08-20 Thread Declan Doherty
Co-authored-by: Des O Dea 
Co-authored-by: John Griffin 
Co-authored-by: Fiona Trahe 

This patch contains the initial proposed APIs and device framework for
integrating crypto packet processing into DPDK.

features include:
 - Crypto device configuration / management APIs
 - Definitions of supported cipher algorithms and operations.
 - Definitions of supported hash/authentication algorithms and
   operations.
 - Crypto session management APIs
 - Crypto operation data structures and APIs allocation of crypto
   operation structure used to specify the crypto operations to
   be performed  on a particular mbuf.
 - Extension of mbuf to contain crypto operation data pointer and
   extra flags.
 - Burst enqueue / dequeue APIs for processing of crypto operations.

Signed-off-by: Declan Doherty 
---
 config/common_bsdapp|   9 +-
 config/common_linuxapp  |   7 +
 doc/api/doxy-api-index.md   |   1 +
 doc/api/doxy-api.conf   |   1 +
 lib/Makefile|   1 +
 lib/librte_cryptodev/Makefile   |  60 ++
 lib/librte_cryptodev/rte_crypto.h   | 649 +++
 lib/librte_cryptodev/rte_crypto_version.map |  40 ++
 lib/librte_cryptodev/rte_cryptodev.c| 966 
 lib/librte_cryptodev/rte_cryptodev.h| 550 
 lib/librte_cryptodev/rte_cryptodev_pmd.h| 622 ++
 lib/librte_eal/common/include/rte_log.h |   1 +
 lib/librte_eal/common/include/rte_memory.h  |  14 +-
 lib/librte_mbuf/rte_mbuf.c  |   1 +
 lib/librte_mbuf/rte_mbuf.h  |  51 ++
 mk/rte.app.mk   |   1 +
 16 files changed, 2971 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_cryptodev/Makefile
 create mode 100644 lib/librte_cryptodev/rte_crypto.h
 create mode 100644 lib/librte_cryptodev/rte_crypto_version.map
 create mode 100644 lib/librte_cryptodev/rte_cryptodev.c
 create mode 100644 lib/librte_cryptodev/rte_cryptodev.h
 create mode 100644 lib/librte_cryptodev/rte_cryptodev_pmd.h

diff --git a/config/common_bsdapp b/config/common_bsdapp
index b37dcf4..ed30180 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -147,6 +147,13 @@ CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

 #
+# Compile generic Crypto device library
+#
+CONFIG_RTE_LIBRTE_CRYPTODEV=y
+CONFIG_RTE_LIBRTE_CRYPTODEV_DEBUG=y
+CONFIG_RTE_MAX_CRYPTOPORTS=32
+
+#
 # Support NIC bypass logic
 #
 CONFIG_RTE_NIC_BYPASS=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..12a75c6 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -145,6 +145,13 @@ CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y

 #
+# Compile generic Crypto device library
+#
+CONFIG_RTE_LIBRTE_CRYPTODEV=y
+CONFIG_RTE_LIBRTE_CRYPTODEV_DEBUG=y
+CONFIG_RTE_MAX_CRYPTODEVS=64
+
+#
 # Support NIC bypass logic
 #
 CONFIG_RTE_NIC_BYPASS=n
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 72ac3c4..bdb6130 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -39,6 +39,7 @@ There are many libraries, so their headers may be grouped by 
topics:
   [dev](@ref rte_dev.h),
   [ethdev] (@ref rte_ethdev.h),
   [ethctrl](@ref rte_eth_ctrl.h),
+  [cryptodev]  (@ref rte_cryptodev.h),
   [devargs](@ref rte_devargs.h),
   [bond]   (@ref rte_eth_bond.h),
   [vhost]  (@ref rte_virtio_net.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index cfb4627..7244b8f 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -37,6 +37,7 @@ INPUT   = doc/api/doxy-api-index.md \
   lib/librte_cfgfile \
   lib/librte_cmdline \
   lib/librte_compat \
+  lib/librte_cryptodev \
   lib/librte_distributor \
   lib/librte_ether \
   lib/librte_hash \
diff --git a/lib/Makefile b/lib/Makefile
index 2055539..9e5f484 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer
 DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
diff --git a/lib/librte_cryptodev/Makefile b/

[dpdk-dev] [PATCH 2/4] qat_crypto_pmd: Addition of a new QAT DPDK PMD.

2015-08-20 Thread Declan Doherty
From: John Griffin 

Co-authored-by: Des O Dea 
Co-authored-by: Fiona Trahe 

This patch adds a PMD for the Intel Quick Assist Technology DH895xxC
hardware accelerator.
This PMD will adhere to the cryptodev API (contained in a previous patch).
This patch depends on a QAT PF driver which may be downloaded from
01.org (please see the file qat_pf_driver_install.txt contained in
this patch).

This is a limited patchset which has support for a chain of cipher and
hash  the following algorithms are supported:
Cipher algorithms:
 -   RTE_CRYPTO_SYM_CIPHER_AES128_CBC
 -   RTE_CRYPTO_SYM_CIPHER_AES256_CBC
 -   RTE_CRYPTO_SYM_CIPHER_AES512_CBC
Hash algorithms:
 -   RTE_CRYPTO_SYM_HASH_SHA1_HMAC
 -   RTE_CRYPTO_SYM_HASH_SHA256_HMAC
 -   RTE_CRYPTO_SYM_HASH_SHA512_HMAC

Some limitation on this patchset which shall be contributed in a
subsequent release:
 -   Chained mbufs are not supported.
 -   Hash only is not supported.
 -   Cipher only is not supported.
 -   Only in-place is currently supported (destination address is the
 same as source address).
 -   Only supports session-oriented API implementation (session-less
 APIs are not supported).
 -   Not performance tuned.

Signed-off-by: Declan Doherty 
---
 config/common_bsdapp   |  13 +
 config/common_linuxapp |  15 +-
 doc/guides/cryptodevs/index.rst|  42 ++
 doc/guides/cryptodevs/qat.rst  | 155 +++
 doc/guides/index.rst   |   1 +
 drivers/Makefile   |   1 +
 drivers/crypto/Makefile|  38 ++
 drivers/crypto/qat/Makefile|  63 +++
 .../qat/qat_adf/adf_transport_access_macros.h  | 173 
 drivers/crypto/qat/qat_adf/icp_qat_fw.h| 316 ++
 drivers/crypto/qat/qat_adf/icp_qat_fw_la.h | 404 ++
 drivers/crypto/qat/qat_adf/icp_qat_hw.h| 305 ++
 drivers/crypto/qat/qat_adf/qat_algs.h  | 124 ++
 drivers/crypto/qat/qat_adf/qat_algs_build_desc.c   | 462 
 drivers/crypto/qat/qat_crypto.c| 469 +
 drivers/crypto/qat/qat_crypto.h|  99 +
 drivers/crypto/qat/qat_logs.h  |  78 
 drivers/crypto/qat/qat_qp.c| 372 
 drivers/crypto/qat/rte_pmd_qat_version.map |   5 +
 drivers/crypto/qat/rte_qat_cryptodev.c | 128 ++
 mk/rte.app.mk  |   3 +
 21 files changed, 3265 insertions(+), 1 deletion(-)
 create mode 100644 doc/guides/cryptodevs/index.rst
 create mode 100644 doc/guides/cryptodevs/qat.rst
 create mode 100644 drivers/crypto/Makefile
 create mode 100644 drivers/crypto/qat/Makefile
 create mode 100644 drivers/crypto/qat/qat_adf/adf_transport_access_macros.h
 create mode 100644 drivers/crypto/qat/qat_adf/icp_qat_fw.h
 create mode 100644 drivers/crypto/qat/qat_adf/icp_qat_fw_la.h
 create mode 100644 drivers/crypto/qat/qat_adf/icp_qat_hw.h
 create mode 100644 drivers/crypto/qat/qat_adf/qat_algs.h
 create mode 100644 drivers/crypto/qat/qat_adf/qat_algs_build_desc.c
 create mode 100644 drivers/crypto/qat/qat_crypto.c
 create mode 100644 drivers/crypto/qat/qat_crypto.h
 create mode 100644 drivers/crypto/qat/qat_logs.h
 create mode 100644 drivers/crypto/qat/qat_qp.c
 create mode 100644 drivers/crypto/qat/rte_pmd_qat_version.map
 create mode 100644 drivers/crypto/qat/rte_qat_cryptodev.c

diff --git a/config/common_bsdapp b/config/common_bsdapp
index ed30180..8fcc004 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -154,6 +154,19 @@ CONFIG_RTE_LIBRTE_CRYPTODEV_DEBUG=y
 CONFIG_RTE_MAX_CRYPTOPORTS=32

 #
+# Compile PMD for QuickAssist based devices
+#
+CONFIG_RTE_LIBRTE_PMD_QAT=y
+CONFIG_RTE_LIBRTE_QAT_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_QAT_DEBUG_TX=y
+CONFIG_RTE_LIBRTE_QAT_DEBUG_RX=y
+CONFIG_RTE_LIBRTE_QAT_DEBUG_DRIVER=y
+#
+# Number of sessions to create in the session memory pool
+# on a single QuickAssist device.
+#
+CONFIG_RTE_MAX_QAT_SESSIONS=200
+
 # Support NIC bypass logic
 #
 CONFIG_RTE_NIC_BYPASS=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 12a75c6..7199c95 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -152,6 +152,19 @@ CONFIG_RTE_LIBRTE_CRYPTODEV_DEBUG=y
 CONFIG_RTE_MAX_CRYPTODEVS=64

 #
+# Compile PMD for QuickAssist based devices
+#
+CONFIG_RTE_LIBRTE_PMD_QAT=y
+CONFIG_RTE_LIBRTE_PMD_QAT_DEBUG_INIT=n
+CONFIG_RTE_LIBRTE_PMD_QAT_DEBUG_TX=y
+CONFIG_RTE_LIBRTE_PMD_QAT_DEBUG_RX=y
+CONFIG_RTE_LIBRTE_PMD_QAT_DEBUG_DRIVER=y
+#
+# Number of sessions to create

[dpdk-dev] [PATCH 3/4] aesni_mb_pmd: Initial implementation of multi buffer based crypto device

2015-08-20 Thread Declan Doherty
This patch provides the initial implementation of the AES-NI multi-buffer
based crypto poll mode driver using DPDK's new cryptodev framework.
This PMD is dependent on Intel's multibuffer library, see the white paper
"Fast Multi-buffer IPsec Implementations on Intel? Architecture
Processors", see ref 1 for details on the library's design and ref 2 to
download the library itself. This initial implementation is limited to
supporting the chained operations of "hash then cipher" or "cipher then
hash" for the following cipher and hash algorithms:

 - RTE_CRYPTO_SYM_CIPHER_AES128_CBC
 - RTE_CRYPTO_SYM_CIPHER_AES256_CBC
 - RTE_CRYPTO_SYM_CIPHER_AES512_CBC
 - RTE_CRYPTO_SYM_HASH_SHA1_HMAC
 - RTE_CRYPTO_SYM_HASH_SHA256_HMAC
 - RTE_CRYPTO_SYM_HASH_SHA512_HMAC

Important Note:
Due to the fact that the multi-buffer library is designed for
accelerating IPsec crypto oepration, the digest's generated for the HMAC
functions are truncated to lengths specified by IPsec RFC's, ie RFC2404
for using HMAC-SHA-1 with IPsec specifies that the digest is truncate
from 20 to 12 bytes.

Build instructions:
To build DPKD with the AESNI_MB_PMD the user is required to download
(ref 2) and compile the multi-buffer library on there user system before
building DPDK. The environmental variable AESNI_MULTI_BUFFER_LIB_PATH
must be exported with the path where you extracted and built the multi
buffer library and finally set CONFIG_RTE_LIBRTE_PMD_AESNI_MB=y in
config/common_linuxapp.

Current status: This is a work in progress, which has not been
performance tuned. The software has only been built and tested on
Fedora 20 64-bit using gcc. It's doesn't support crypto operation across
chained mbufs, or cipher only or hash only operations.

ref 1:
https://www-ssl.intel.com/content/www/us/en/intelligent-systems/intel-technology/fast-multi-buffer-ipsec-implementations-ia-processors-p

ref 2: https://downloadcenter.intel.com/download/22972

Signed-off-by: Declan Doherty 
---
 config/common_bsdapp   |   8 +
 config/common_linuxapp |   7 +
 doc/guides/cryptodevs/aesni_mb.rst |  76 +++
 doc/guides/cryptodevs/index.rst|   1 +
 drivers/crypto/Makefile|   2 +-
 drivers/crypto/aesni_mb/Makefile   |  67 +++
 drivers/crypto/aesni_mb/aesni_mb_ops.h | 206 
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 550 +
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c | 346 +
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h | 224 +
 drivers/crypto/aesni_mb/rte_pmd_aesni_version.map  |   5 +
 mk/rte.app.mk  |   4 +
 12 files changed, 1495 insertions(+), 1 deletion(-)
 create mode 100644 doc/guides/cryptodevs/aesni_mb.rst
 create mode 100644 drivers/crypto/aesni_mb/Makefile
 create mode 100644 drivers/crypto/aesni_mb/aesni_mb_ops.h
 create mode 100644 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
 create mode 100644 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c
 create mode 100644 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h
 create mode 100644 drivers/crypto/aesni_mb/rte_pmd_aesni_version.map

diff --git a/config/common_bsdapp b/config/common_bsdapp
index 8fcc004..9c5e1e0 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -167,6 +167,14 @@ CONFIG_RTE_LIBRTE_QAT_DEBUG_DRIVER=y
 #
 CONFIG_RTE_MAX_QAT_SESSIONS=200

+
+#
+# Compile PMD for AESNI backed device
+#
+CONFIG_RTE_LIBRTE_PMD_AESNI_MB=y
+CONFIG_RTE_LIBRTE_AESNI_MB_DEBUG=n
+
+#
 # Support NIC bypass logic
 #
 CONFIG_RTE_NIC_BYPASS=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 7199c95..8e9e8fd 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -165,6 +165,13 @@ CONFIG_RTE_LIBRTE_PMD_QAT_DEBUG_DRIVER=y
 #
 CONFIG_RTE_LIBRTE_PMD_QAT_MAX_SESSIONS=4096

+# Compile PMD for AESNI backed device
+#
+CONFIG_RTE_LIBRTE_PMD_AESNI_MB=y
+CONFIG_RTE_LIBRTE_PMD_AESNI_MB_DEBUG=n
+CONFIG_RTE_LIBRTE_PMD_AESNI_MB_MAX_SESSIONS=2048
+
+#
 # Support NIC bypass logic
 #
 CONFIG_RTE_NIC_BYPASS=n
diff --git a/doc/guides/cryptodevs/aesni_mb.rst 
b/doc/guides/cryptodevs/aesni_mb.rst
new file mode 100644
index 000..4d15b6b
--- /dev/null
+++ b/doc/guides/cryptodevs/aesni_mb.rst
@@ -0,0 +1,76 @@
+..  BSD LICENSE
+Copyright(c) 2015 Intel Corporation. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+* Neither the name of Intel Corporation nor the names of it

[dpdk-dev] [PATCH 4/4] app/test: add cryptodev unit and performance tests

2015-08-20 Thread Declan Doherty
Co-authored-by: Des O Dea 
Co-authored-by: John Griffin 
Co-authored-by: Fiona Trahe 

unit tests are run by using cryptodev_qat_autotest or
cryptodev_aesni_autotest from the test apps interactive console.

performance tests are run by using the cryptodev_qat_perftest or
cryptodev_aesni_mb_perftest command from the test apps interactive
console.

If you which to run the tests on a QAT device there must be one
bound to igb_uio kernel driver.

Signed-off-by: Declan Doherty 
---
 app/test/Makefile  |7 +-
 app/test/test.c|   91 ++-
 app/test/test.h|   34 +-
 app/test/test_cryptodev.c  | 1079 +++
 app/test/test_cryptodev_perf.c | 1438 
 app/test/test_link_bonding.c   |6 +-
 app/test/test_link_bonding_mode4.c |7 +-
 7 files changed, 2616 insertions(+), 46 deletions(-)
 create mode 100644 app/test/test_cryptodev.c
 create mode 100644 app/test/test_cryptodev_perf.c

diff --git a/app/test/Makefile b/app/test/Makefile
index e7f148f..0812487 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -140,11 +140,14 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += test_link_bonding.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += test_link_bonding_mode4.c
 endif

+SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev.c
+
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_RING) += test_pmd_ring.c
 SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) += test_kvargs.c

-CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS)
+#CFLAGS += -O3
+CFLAGS += -g -O0 $(WERROR_FLAGS)

 # Disable warnings of deprecated-declarations in test_kni.c
 ifeq ($(CC), icc)
diff --git a/app/test/test.c b/app/test/test.c
index e8992f4..19cfcb1 100644
--- a/app/test/test.c
+++ b/app/test/test.c
@@ -159,51 +159,82 @@ main(int argc, char **argv)
 int
 unit_test_suite_runner(struct unit_test_suite *suite)
 {
-   int retval, i = 0;
+   int test_success;
+   unsigned total = 0, executed = 0, skipped = 0, succeeded = 0, failed = 
0;

if (suite->suite_name)
-   printf("Test Suite : %s\n", suite->suite_name);
+   printf(" + 
--- +\n");
+   printf(" + Test Suite : %s\n", suite->suite_name);

if (suite->setup)
if (suite->setup() != 0)
-   return -1;
-
-   while (suite->unit_test_cases[i].testcase) {
-   /* Run test case setup */
-   if (suite->unit_test_cases[i].setup) {
-   retval = suite->unit_test_cases[i].setup();
-   if (retval != 0)
-   return retval;
-   }
+   goto suite_summary;

-   /* Run test case */
-   if (suite->unit_test_cases[i].testcase() == 0) {
-   printf("TestCase %2d: %s\n", i,
-   suite->unit_test_cases[i].success_msg ?
-   suite->unit_test_cases[i].success_msg :
-   "passed");
+   printf(" + --- 
+\n");
+
+   while (suite->unit_test_cases[total].testcase) {
+   if (!suite->unit_test_cases[total].enabled) {
+   skipped++;
+   total++;
+   continue;
+   } else {
+   executed++;
}
-   else {
-   printf("TestCase %2d: %s\n", i, 
suite->unit_test_cases[i].fail_msg ?
-   suite->unit_test_cases[i].fail_msg :
-   "failed");
-   return -1;
+
+   /* run test case setup */
+   if (suite->unit_test_cases[total].setup)
+   test_success = suite->unit_test_cases[total].setup();
+   else
+   test_success = TEST_SUCCESS;
+
+   if (test_success == TEST_SUCCESS) {
+   /* run the test case */
+   test_success = suite->unit_test_cases[total].testcase();
+   if (test_success == TEST_SUCCESS)
+   succeeded++;
+   else
+   failed++;
+   } else {
+   failed++;
}

-   /* Run test case teardown */
-   if (suite->unit_test_cases[i].teardown) {
-   retval = suite->unit_test_cases[i].teardown();
-   if (retval != 0)
-   return retval;
+   /* run the test case teardown */
+   if (suite->unit_test_cases[total].teardown) {
+   suite->unit_test_cases[total].teardown();
}

-   i++;
+  

[dpdk-dev] [PATCH v3] ixgbe_pmd: enforce RS bit on every EOP descriptor for devices newer than 82598

2015-08-20 Thread Vlad Zolotarov
According to 82599 and x540 HW specifications RS bit *must* be
set in the last descriptor of *every* packet.

Before this patch there were 3 types of Tx callbacks that were setting
RS bit every tx_rs_thresh descriptors. This patch introduces a set of
new callbacks, one for each type mentioned above, that will set the RS
bit in every EOP descriptor.

ixgbe_set_tx_function() will set the appropriate Tx callback according
to the device family.

This patch fixes the Tx hang we were constantly hitting with a
seastar-based application on x540 NIC.

Signed-off-by: Vlad Zolotarov 
---
New in v3:
   - Enforce the RS bit setting instead of enforcing tx_rs_thresh to be 1.
---
 drivers/net/ixgbe/ixgbe_ethdev.c   |  14 +++-
 drivers/net/ixgbe/ixgbe_ethdev.h   |   4 ++
 drivers/net/ixgbe/ixgbe_rxtx.c | 140 -
 drivers/net/ixgbe/ixgbe_rxtx.h |   2 +
 drivers/net/ixgbe/ixgbe_rxtx_vec.c |  27 +--
 5 files changed, 148 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index b8ee1e9..355882c 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -866,12 +866,17 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
uint32_t ctrl_ext;
uint16_t csum;
int diag, i;
+   bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);

PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &ixgbe_eth_dev_ops;
eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
-   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+
+   if (rs_deferring_allowed)
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+   else
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;

/*
 * For secondary processes, we don't initialise any further as primary
@@ -1147,12 +1152,17 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
struct ixgbe_hwstrip *hwstrip =
IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(eth_dev->data->dev_private);
struct ether_addr *perm_addr = (struct ether_addr *) hw->mac.perm_addr;
+   bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);

PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
-   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+
+   if (rs_deferring_allowed)
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+   else
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index c3d4f4f..390356d 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -367,9 +367,13 @@ uint16_t ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,

 uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_always_rs(
+   void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_simple_always_rs(
+   void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 91023b9..9ddaaa5 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -164,11 +164,16 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)

 /* Populate 4 descriptors with data from 4 mbufs */
 static inline void
-tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
+tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts,
+bool always_rs)
 {
uint64_t buf_dma_addr;
uint32_t pkt_len;
int i;
+   uint32_t flags = DCMD_DTYP_FLAGS;
+
+   if (always_rs)
+   flags |= IXGBE_ADVTXD_DCMD_RS;

for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
@@ -178,7 +183,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf 
**pkts)
txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);

txdp->read.cmd_type_len =
-   rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
+   rte_cpu_to_le_32(flags | pkt_len);

txdp->read.olinfo_status =
rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
@@ -189,10 +194,15 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct 
rte_mbuf **pkts)

 /* Populate 1 descriptor with data from 1 mbuf */
 static inline void
-tx1(volatile union ixgbe_adv_tx_desc *txdp, struct r

[dpdk-dev] [PATCH v4] ixgbe_pmd: enforce RS bit on every EOP descriptor for devices newer than 82598

2015-08-20 Thread Vlad Zolotarov
According to 82599 and x540 HW specifications RS bit *must* be
set in the last descriptor of *every* packet.

Before this patch there were 3 types of Tx callbacks that were setting
RS bit every tx_rs_thresh descriptors. This patch introduces a set of
new callbacks, one for each type mentioned above, that will set the RS
bit in every EOP descriptor.

ixgbe_set_tx_function() will set the appropriate Tx callback according
to the device family.

This patch fixes the Tx hang we were constantly hitting with a
seastar-based application on x540 NIC.

Signed-off-by: Vlad Zolotarov 
---
New in v4:
   - Styling (white spaces) fixes.

New in v3:
   - Enforce the RS bit setting instead of enforcing tx_rs_thresh to be 1.
---
 drivers/net/ixgbe/ixgbe_ethdev.c   |  14 +++-
 drivers/net/ixgbe/ixgbe_ethdev.h   |   4 ++
 drivers/net/ixgbe/ixgbe_rxtx.c | 139 -
 drivers/net/ixgbe/ixgbe_rxtx.h |   2 +
 drivers/net/ixgbe/ixgbe_rxtx_vec.c |  29 ++--
 5 files changed, 149 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index b8ee1e9..355882c 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -866,12 +866,17 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
uint32_t ctrl_ext;
uint16_t csum;
int diag, i;
+   bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);

PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &ixgbe_eth_dev_ops;
eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
-   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+
+   if (rs_deferring_allowed)
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+   else
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;

/*
 * For secondary processes, we don't initialise any further as primary
@@ -1147,12 +1152,17 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
struct ixgbe_hwstrip *hwstrip =
IXGBE_DEV_PRIVATE_TO_HWSTRIP_BITMAP(eth_dev->data->dev_private);
struct ether_addr *perm_addr = (struct ether_addr *) hw->mac.perm_addr;
+   bool rs_deferring_allowed = (hw->mac.type <= ixgbe_mac_82598EB);

PMD_INIT_FUNC_TRACE();

eth_dev->dev_ops = &ixgbevf_eth_dev_ops;
eth_dev->rx_pkt_burst = &ixgbe_recv_pkts;
-   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+
+   if (rs_deferring_allowed)
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;
+   else
+   eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts_always_rs;

/* for secondary processes, we don't initialise any further as primary
 * has already done this work. Only check we don't need a different
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index c3d4f4f..390356d 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -367,9 +367,13 @@ uint16_t ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue,

 uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_always_rs(
+   void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

 uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ixgbe_xmit_pkts_simple_always_rs(
+   void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);

 int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
  struct rte_eth_rss_conf *rss_conf);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 91023b9..044f72c 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -164,11 +164,16 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)

 /* Populate 4 descriptors with data from 4 mbufs */
 static inline void
-tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
+tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts,
+bool always_rs)
 {
uint64_t buf_dma_addr;
uint32_t pkt_len;
int i;
+   uint32_t flags = DCMD_DTYP_FLAGS;
+
+   if (always_rs)
+   flags |= IXGBE_ADVTXD_DCMD_RS;

for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
@@ -178,7 +183,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf 
**pkts)
txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);

txdp->read.cmd_type_len =
-   rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
+   rte_cpu_to_le_32(flags | pkt_len);

txdp->read.olinfo_status =
rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
@@ -189,10 +194,15 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct 
rte_mbuf **pkts)

 /* Populate 1 descriptor with data from 1 mbuf */
 static inline void
-tx1(

[dpdk-dev] ISSUE IN rte_bitmap

2015-08-20 Thread Ariel Rodriguez
Hi , i am facing a issue regarding with the maximum amount of bits that the
rte_bitmap allows.

Example :

bmp_mem_size = rte_bitmap_get_memory_footprint(4000);

OK

(bmp_mem_size==2101312 (size in bytes aligned). Around 2MB)

but if i do this:

bmp_mem_size = rte_bitmap_get_memory_footprint(4294967292); (MAX uint32_t).

(bmp_mem_size == 64).

obviously this is wrong , besides the validation problem inside the
function rte_bitmap_get_memory_footprint, the main problem is that the
rte_bitmap api uses word size (4 bytes ints) for calculations of the max
bitmap size and ,of course, make them turn around.

In the rte_sched code there is a similar issue. In theory (By doc) the qos
scheduler can handle a configuration like this:

struct rte_sched_port_hierarchy {
uint32_t queue:2;/**< Queue ID (0 .. 3) */
uint32_t traffic_class:2; /**< Traffic class ID (0 .. 3)*/
uint32_t pipe:20; /**< Pipe ID */  (Compile time configuration) 1M
subscribers
uint32_t subport:6; /**< Subport ID */ 32 subport
uint32_t color:2; /**< Color */
};


If one try to make a struct rte_sched_port* with that configuration (4095
MAX pipe profiles) , the api fails because when the function
rte_sched_port_config() tries to initialize the maximum memory use for that
configuration previously calling rte_sched_port_get_array_base() , that
function generates invalid total size, because the total memory size needed
is beyond 4Gb , bigger than unsigned int max limit. (Again here the code is
using uint32_t data for size calculation).

I know maybe the case use of qos scheduler is not intend for that huge
configuration but looking at the code there is no limitation except for the
4 bytes RSS struct rte_sched_port_hierarchy bit fields .


[dpdk-dev] DPDK 2.1.0 build error: inlining failed in call to always_inline

2015-08-20 Thread Keith E. Fleming
make config T=x86_64-native-linuxapp-gccsed -ri 's,(PMD_PCAP=).*,\1y,' 
build/.configmake
[root at neutron dpdk-2.1.0]# make 2>&1 | more
== Build lib
== Build lib/librte_compat
== Build lib/librte_eal
== Build lib/librte_eal/common
== Build lib/librte_eal/linuxapp
== Build lib/librte_eal/linuxapp/igb_uio
(cat /dev/null;?? echo 
kernel//root/dpdk-2.1.0/build/build/lib/librte_eal/linuxapp/igb_uio/igb_uio.ko;)
 > /root/dpdk-2.1.0/build/build
/lib/librte_eal/linuxapp/igb_uio/modules.order
? Building modules, stage 2.
? MODPOST 1 modules
== Build lib/librte_eal/linuxapp/eal
? CC eal_common_options.o
In file included from 
/usr/lib/gcc/x86_64-redhat-linux/4.9.2/include/x86intrin.h:37:0,
 from /root/dpdk-2.1.0/build/include/rte_vect.h:67,
 from /root/dpdk-2.1.0/build/include/rte_memcpy.h:46,
 from 
/root/dpdk-2.1.0/lib/librte_eal/common/eal_common_options.c:48:
/root/dpdk-2.1.0/build/include/rte_memcpy.h: In function ?rte_memcpy?:
/usr/lib/gcc/x86_64-redhat-linux/4.9.2/include/tmmintrin.h:185:1: error: 
inlining failed in call to always_inline ?_mm_alignr_epi8?: t
arget specific option mismatch
?_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
?^
In file included from 
/root/dpdk-2.1.0/lib/librte_eal/common/eal_common_options.c:48:0:
/root/dpdk-2.1.0/build/include/rte_memcpy.h:425:9: error: called from here

I can't even guess at what the problem is here. It's a total trainwreck. I 
assume this has compiled successfully for someone somewhere? Thanks


[dpdk-dev] Issue observed with execution of Reorder test app

2015-08-20 Thread Mukesh Dua
Hi, Thanks for sharing the details.
On the basis of my understanding, have made the following changes. The test
is now passing with the changes.

diff -rupN a/app/test/test_reorder.c b/app/test/test_reorder.c
--- a/app/test/test_reorder.c   2015-08-20 13:59:55.0 -0400
+++ b/app/test/test_reorder.c   2015-08-21 00:19:10.305447948 -0400
@@ -181,10 +181,10 @@ test_reorder_insert(void)
ret = rte_mempool_get_bulk(p, (void *)bufs, num_bufs);
TEST_ASSERT_SUCCESS(ret, "Error getting mbuf from pool");

-   /* late packet */
+   /* late packet - registers the min_seqn */
bufs[0]->seqn = 3 * size;
ret = rte_reorder_insert(b, bufs[0]);
-   if (!((ret == -1) && (rte_errno == ERANGE))) {
+   if (ret != 0) {
printf("%s:%d: No error inserting late packet with seqn:"
" 3 * size\n", __func__, __LINE__);
ret = -1;
@@ -192,7 +192,7 @@ test_reorder_insert(void)
}

for (i = 0; i < num_bufs; i++)
-   bufs[i]->seqn = i;
+   bufs[i]->seqn = bufs[0]->seqn + i;

/* This should fill up order buffer:
 * reorder_seq = 0
@@ -223,7 +223,7 @@ test_reorder_insert(void)
}

/* early packet from current sequence window - full ready buffer */
-   bufs[5]->seqn = 2 * size;
+   bufs[5]->seqn = 5 * size;
ret = rte_reorder_insert(b, bufs[5]);
if (!((ret == -1) && (rte_errno == ENOSPC))) {
printf("%s:%d: No error inserting early packet with full ready
buffer\n",
@@ -276,29 +276,30 @@ test_reorder_drain(void)
/* Insert packet with seqn 1:
 * reorder_seq = 0
 * RB[] = {NULL, NULL, NULL, NULL}
-* OB[] = {NULL, 1, NULL, NULL}
+* OB[] = {1, NULL, NULL, NULL}


Regards,
Mukesh

On Thu, Aug 20, 2015 at 5:35 PM, Gonzalez Monroy, Sergio <
sergio.gonzalez.monroy at intel.com> wrote:

> On 20/08/2015 12:38, Mukesh Dua wrote:
>
>> I see issue with reorder test app failing on x86 environment due to
>> changes
>> made between release 2.0.0 and 2.1.0:
>>
>> App reorder_test (app/test/test_reorder.c)
>> 
>> Function failing: test_reorder_insert
>>
>> There had been some changes with respect to addition of parameter
>> is_initialized to the structure rte_reorder_buffer. In parallel the
>> changes
>> were made to initialize some of the parameters in function
>> rte_reorder_insert
>>
>> rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
>> {
>>  uint32_t offset, position;
>>  struct cir_buffer *order_buf = &b->order_buf;
>>
>> *if (!b->is_initialized) {*
>> *b->min_seqn = mbuf->seqn;*
>>  *b->is_initialized = 1;*
>> *}*
>>
>> => I don't see any reason to set b->min_seqn to mbuf->seqn and if that has
>> to be done, the conditional checks should have been modified in function
>> test_reorder_insert soon after a call to rte_reorder_insert. Additionally,
>> the next seqn number being populated should have been changed in function
>> test_reorder_insert:
>>
>>  ret = rte_reorder_insert(b, bufs[0]);
>> *if (!((ret == -1) && (rte_errno == ERANGE))) {*
>> *printf("%s:%d: No error inserting late packet with seqn:"*
>> *" 3 * size\n", __func__, __LINE__);*
>> *ret = -1;*
>> *goto exit;*
>> *}*
>>
>>  for (i = 0; i < num_bufs; i++)
>>  bufs[i]->seqn = i;
>>
>> On the other hand, changing the code in function rte_reorder_insert:
>> rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
>> {
>>  uint32_t offset, position;
>>  struct cir_buffer *order_buf = &b->order_buf;
>>
>>  if (!b->is_initialized) {
>> *b->min_seqn = 0;  //Removed initialization from mbuf->seqn*
>>  b->is_initialized = 1;
>>  }
>> fixes the issues and the test case passes.
>>
>> Regards,
>> Mukesh
>>
> Hi Mukesh,
>
> The reason for that change is explained in its commit message and also in
> this thread:
> http://dpdk.org/ml/archives/dev/2015-May/017930.html
>
> Hope this info helps to clarify your concern.
>
> Sergio
>


[dpdk-dev] flow_director_filter error!!

2015-08-20 Thread Navneet Rao
Thanks John.

I am trying to setup/use the flow-director-filter on the i540.

-- When I try to setup the flow-director-filter as per the example, I am 
getting "bad arguments"!!!
 So decided to see if the flush command would work.


In the interim --- I am using ethertype filter to accomplish the following.
What I am trying to do is this --
Use 2 different i540 cards
Use the igb_uio driver.
Use the testpmd app.
Setup 5 different MAC-ADDRESSes on each port. (using the set mac_addr command)
Setup 5 different RxQs and TxQs on each port.
And then use the testpmd app to generate traffic..

I am assuming that the testpmd app will now send and receive traffic using the 
5 different MAC_ADDRESSes..
On each port's receive I will now want to classify on the MAC-ADDRESS and steer 
the traffic to different queues.

Is there an example/reference on how to achieve this?

Next, I would want to do "classify" on "flexbytes" and send/steer the traffic 
to different queues using flow-director-filter.

Thanks
-Navneet




-Original Message-
From: Mcnamara, John [mailto:john.mcnam...@intel.com] 
Sent: Wednesday, August 19, 2015 3:39 PM
To: Navneet Rao; dev at dpdk.org
Subject: RE: [dpdk-dev] flow_director_filter error!!

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Navneet Rao
> Sent: Tuesday, August 18, 2015 4:01 PM
> To:  HYPERLINK "mailto:dev at dpdk.org" dev at dpdk.org
> Subject: [dpdk-dev] flow_director_filter error!!
> 
> After I start the testpmd app, I am flusing the flow_director_filter 
> settings and get the following error -
> 
> 
> 
> testpmd> flush_flow_director 0
> 
> PMD: ixgbe_fdir_flush(): Failed to re-initialize FD table.
> 
> flow director table flushing error: (Too many open files in system)

Hi,

Are you setting a flow director filter before flushing? If so, could you give 
an example.

John.
-- 



[dpdk-dev] [PATCH 1/4] cryptodev: Initial DPDK Crypto APIs and device framework release

2015-08-20 Thread Neil Horman
On Thu, Aug 20, 2015 at 03:07:20PM +0100, Declan Doherty wrote:
> Co-authored-by: Des O Dea 
> Co-authored-by: John Griffin 
> Co-authored-by: Fiona Trahe 
> 
> This patch contains the initial proposed APIs and device framework for
> integrating crypto packet processing into DPDK.
> 
> features include:
>  - Crypto device configuration / management APIs
>  - Definitions of supported cipher algorithms and operations.
>  - Definitions of supported hash/authentication algorithms and
>operations.
>  - Crypto session management APIs
>  - Crypto operation data structures and APIs allocation of crypto
>operation structure used to specify the crypto operations to
>be performed  on a particular mbuf.
>  - Extension of mbuf to contain crypto operation data pointer and
>extra flags.
>  - Burst enqueue / dequeue APIs for processing of crypto operations.
> 
> Signed-off-by: Declan Doherty 

Hey, only had a qick read so some of this might be off base, but a few comments
in line

>
> index 000..b776609
> --- /dev/null
> +++ b/lib/librte_cryptodev/rte_crypto.h
> @@ -0,0 +1,649 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2015 Intel Corporation. All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + *   notice, this list of conditions and the following disclaimer in
> + *   the documentation and/or other materials provided with the
> + *   distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + *   contributors may be used to endorse or promote products derived
> + *   from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _RTE_CRYPTO_H_
> +#define _RTE_CRYPTO_H_
> +
> +/**
> + * @file rte_crypto.h
> + *
> + * RTE Cryptographic Definitions
> + *
> + * Defines symmetric cipher and authentication algorithms and modes, as well
> + * as supported symmetric crypto operation combinations.
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include 
> +#include 
> +#include 
> +
> +/**
> + * This enumeration lists different types of crypto operations supported by 
> rte
> + * crypto devices. The operation type is defined during session registration 
> and
> + * cannot be changed for a session once it has been setup, or if using a
> + * session-less crypto operation it is defined within the crypto operation
> + * op_params.
> + */
> +enum rte_crypto_operation_chain {
> + RTE_CRYPTO_SYM_OP_CIPHER_ONLY,
> + /**< Cipher only operation on the data */
> + RTE_CRYPTO_SYM_OP_HASH_ONLY,
> + /**< Hash only operation on the data */
> + RTE_CRYPTO_SYM_OPCHAIN_HASH_CIPHER,
> + /**<
> +  * Chain a hash followed by any cipher operation.
> +  *
> +  * If it is required that the result of the hash (i.e. the digest)
> +  * is going to be included in the data to be ciphered, then:
> +  *
> +  * - The digest MUST be placed in the destination buffer at the
> +  *   location corresponding to the end of the data region to be hashed
> +  *   (hash_start_offset + message length to hash),  i.e. there must be
> +  *   no gaps between the start of the digest and the end of the data
> +  *   region to be hashed.
> +  *
> +  * - The message length to cipher member of the rte_crypto_op_data
> +  *   structure must be equal to the overall length of the plain text,
> +  *   the digest length and any (optional) trailing data that is to be
> +  *   included.
> +  *
> +  * - The message length to cipher must be a multiple to the block
> +  *   size if a block cipher is being used - the implementation does not
> +  *   pad.
> +  */
> + RTE_CRYPTO_SYM_OPCHAIN_CIPHER_HASH,
> + /**<
> +  * Chain any cipher followed by any hash 

[dpdk-dev] [PATCH v2] Change rte_eal_vdev_init to update port_id

2015-08-20 Thread Ravi Kerur
On Wed, Aug 19, 2015 at 7:07 PM, Tetsuya Mukawa  wrote:

> On 2015/08/20 4:42, Ravi Kerur wrote:
> > v2:
> >> Remove rte_pmd_mpipe_devinit changes
> >> Use rte_eal_compare_pci_addr for address comparison
> >> Use dpdk_2.2 in version map file for new functions
> >
> > v1:
> > Changes include
> >> Modify rte_eal_vdev_init to return allocated port_id
> >> Modify rte_eal_probe_one to return allocated port_id
> >
> > 2. Removed following functions
> >> rte_eth_dev_save and
> >> rte_eth_dev_get_changed_port
> >
> > 3. Added 2 new functions
> >> rte_eth_dev_get_port_by_name
> >> rte_eth_dev_get_port_by_addr
> >
> > 4. Fix return error(ENOMEM) in function rte_pmd_mpipe_devinit
> >
> > Compiled on Linux for following targets
> >> x86_64-native-linuxapp-gcc
> >> x86_64-native-linuxapp-clang
> >> x86_x32-native-linuxapp-gcc
> >
> > Compiled on FreeBSD for following targets
> >> x86_64-native-bsdapp-clang
> >> x86_64-native-bsdapp-gcc
> >
> > Tested on Linux/FreeBSD:
> >> port attach eth_null
> >> port start all
> >> port stop all
> >> port close all
> >> port detach 0
> >> port attach eth_null
> >> port start all
> >> port stop all
> >> port close all
> >> port detach 0
> >
> > Successful run of checkpatch.pl on the diffs
> >
> > Successful validate_abi on Linux for following targets
> >
> >> x86_64-native-linuxapp-gcc
> >> x86_64-native-linuxapp-clang
> >
> > Signed-off-by: Ravi Kerur 
> > ---
> >  drivers/net/enic/enic_ethdev.c  |   2 +-
> >  lib/librte_eal/common/eal_common_dev.c  |  13 ++--
> >  lib/librte_eal/common/eal_common_pci.c  |   6 +-
> >  lib/librte_eal/common/include/rte_dev.h |  36 +-
> >  lib/librte_eal/common/include/rte_pci.h |   4 +-
> >  lib/librte_ether/rte_ethdev.c   | 122
> +---
> >  lib/librte_ether/rte_ether_version.map  |   8 +++
> >  7 files changed, 125 insertions(+), 66 deletions(-)
> >
> > diff --git a/drivers/net/enic/enic_ethdev.c
> b/drivers/net/enic/enic_ethdev.c
> > index 8280cea..472ef5a 100644
> > --- a/drivers/net/enic/enic_ethdev.c
> > +++ b/drivers/net/enic/enic_ethdev.c
> > @@ -36,8 +36,8 @@
> >  #include 
> >  #include 
> >
> > -#include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> > diff --git a/lib/librte_eal/common/eal_common_dev.c
> b/lib/librte_eal/common/eal_common_dev.c
> > index 4089d66..ffdb3b5 100644
> > --- a/lib/librte_eal/common/eal_common_dev.c
> > +++ b/lib/librte_eal/common/eal_common_dev.c
> > @@ -37,6 +37,7 @@
> >  #include 
> >  #include 
> >
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -64,7 +65,7 @@ rte_eal_driver_unregister(struct rte_driver *driver)
> >  }
> >
> >  int
> > -rte_eal_vdev_init(const char *name, const char *args)
> > +rte_eal_vdev_init(const char *name, const char *args, uint8_t *port_id)
> >  {
> >   struct rte_driver *driver;
> >
> > @@ -81,8 +82,12 @@ rte_eal_vdev_init(const char *name, const char *args)
> >* will be "eth_pcap", but "name" will be "eth_pcapN".
> >* So use strncmp to compare.
> >*/
> > - if (!strncmp(driver->name, name, strlen(driver->name)))
> > - return driver->init(name, args);
> > + if (!strncmp(driver->name, name, strlen(driver->name))) {
> > + if (!driver->init(name, args))
> > + return rte_eth_dev_get_port_by_name(
> > + name, port_id);
> > + }
> > +
>
> Please remove needless line.
>
> >   }
> >
> >   RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
> > @@ -108,7 +113,7 @@ rte_eal_dev_init(void)
> >   continue;
> >
> >   if (rte_eal_vdev_init(devargs->virtual.drv_name,
> > - devargs->args)) {
> > + devargs->args, NULL)) {
> >   RTE_LOG(ERR, EAL, "failed to initialize %s
> device\n",
> >   devargs->virtual.drv_name);
> >   return -1;
> > diff --git a/lib/librte_eal/common/eal_common_pci.c
> b/lib/librte_eal/common/eal_common_pci.c
> > index 16e8629..3d97892 100644
> > --- a/lib/librte_eal/common/eal_common_pci.c
> > +++ b/lib/librte_eal/common/eal_common_pci.c
> > @@ -79,6 +79,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include "eal_private.h"
> >
> > @@ -322,7 +323,7 @@ pci_detach_all_drivers(struct rte_pci_device *dev)
> >   * the driver of the devive.
> >   */
> >  int
> > -rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
> > +rte_eal_pci_probe_one(const struct rte_pci_addr *addr, uint8_t *port_id)
> >  {
> >   struct rte_pci_device *dev = NULL;
> >   int ret = 0;
> > @@ -337,7 +338,8 @@ rte_eal_pci_probe_one(const struct rte_pci_addr
> *addr)
> >   ret = pci_probe_all_driv

[dpdk-dev] [PATCH] app/test-pmd: Detect NUMA socket count

2015-08-20 Thread Stephen Hurd
Currently, there is a MAX_SOCKET macro which artificially limits the
number of NUMA sockets testpmd can use.  Anything on a higher socket
ends up using socket zero.  This patch replaces this with a variable
set during set_default_fwd_lcores_config() and uses RTE_MAX_NUMA_NODES
where a hard-coded max number of sockets is required.

Signed-off-by: Stephen Hurd 
---
 app/test-pmd/parameters.c | 12 ++--
 app/test-pmd/testpmd.c| 17 +
 app/test-pmd/testpmd.h|  3 +--
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index f1daa6e..fe78723 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -398,9 +398,9 @@ parse_portnuma_config(const char *q_arg)
return -1;
}
socket_id = (uint8_t)int_fld[FLD_SOCKET];
-   if(socket_id >= MAX_SOCKET) {
+   if(socket_id >= max_socket) {
printf("Invalid socket id, range is [0, %d]\n",
-MAX_SOCKET - 1);
+max_socket - 1);
return -1;
}
port_numa[port_id] = socket_id;
@@ -458,9 +458,9 @@ parse_ringnuma_config(const char *q_arg)
return -1;
}
socket_id = (uint8_t)int_fld[FLD_SOCKET];
-   if (socket_id >= MAX_SOCKET) {
+   if (socket_id >= max_socket) {
printf("Invalid socket id, range is [0, %d]\n",
-   MAX_SOCKET - 1);
+   max_socket - 1);
return -1;
}
ring_flag = (uint8_t)int_fld[FLD_FLAG];
@@ -667,12 +667,12 @@ launch_args_parse(int argc, char** argv)
   "invalid ring-numa configuration\n");
if (!strcmp(lgopts[opt_idx].name, "socket-num")) {
n = atoi(optarg);
-   if(n < MAX_SOCKET)
+   if((uint8_t)n < max_socket)
socket_num = (uint8_t)n;
else
rte_exit(EXIT_FAILURE,
"The socket number should be < 
%d\n",
-   MAX_SOCKET);
+   max_socket);
}
if (!strcmp(lgopts[opt_idx].name, "mbuf-size")) {
n = atoi(optarg);
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 386bf84..2578b6b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -313,6 +313,8 @@ struct queue_stats_mappings *rx_queue_stats_mappings = 
rx_queue_stats_mappings_a
 uint16_t nb_tx_queue_stats_mappings = 0;
 uint16_t nb_rx_queue_stats_mappings = 0;

+unsigned max_socket = 0;
+
 /* Forward function declarations */
 static void map_port_queue_stats_mapping_registers(uint8_t pi, struct rte_port 
*port);
 static void check_all_ports_link_status(uint32_t port_mask);
@@ -345,6 +347,7 @@ set_default_fwd_lcores_config(void)
 {
unsigned int i;
unsigned int nb_lc;
+   unsigned int sock_num;

nb_lc = 0;
for (i = 0; i < RTE_MAX_LCORE; i++) {
@@ -353,6 +356,12 @@ set_default_fwd_lcores_config(void)
if (i == rte_get_master_lcore())
continue;
fwd_lcores_cpuids[nb_lc++] = i;
+   sock_num = rte_lcore_to_socket_id(i) + 1;
+   if (sock_num > max_socket) {
+   if (sock_num > RTE_MAX_NUMA_NODES)
+   rte_exit(EXIT_FAILURE, "Total sockets greater 
than %u\n", RTE_MAX_NUMA_NODES);
+   max_socket = sock_num;
+   }
}
nb_lcores = (lcoreid_t) nb_lc;
nb_cfg_lcores = nb_lcores;
@@ -446,7 +455,7 @@ check_socket_id(const unsigned int socket_id)
 {
static int warning_once = 0;

-   if (socket_id >= MAX_SOCKET) {
+   if (socket_id >= max_socket) {
if (!warning_once && numa_support)
printf("Warning: NUMA should be configured manually by"
   " using --port-numa-config and"
@@ -466,9 +475,9 @@ init_config(void)
struct rte_mempool *mbp;
unsigned int nb_mbuf_per_pool;
lcoreid_t  lc_id;
-   uint8_t port_per_socket[MAX_SOCKET];
+   uint8_t port_per_socket[RTE_MAX_NUMA_NODES];

-   memset(port_per_socket,0,MAX_SOCKET);
+   memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
/* Configuration of logical cores. */
fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
sizeof(struct fwd_lcore *) * nb_lcores,
@@ -545,7 +554,7 @@ init_config(void)