[dpdk-dev] [PATCH] net/sfc: fix filter exceptions logic

2018-07-14 Thread Andrew Rybchenko
From: Igor Romanov 

Now exception logic handles these cases:

When FW variant does not support filters with transport ports, but
IP protocol filters are supported, TCP/UDP protocol filters may be
used. When FW variant does not support filters with IPv4/6 addresses
or IP protocol, but filters with EtherType are supported, IPv4 and
IPv6 EtherTypes may be used

Fixes: 096dba799b4a ("net/sfc: avoid creation of ineffective flow rules")
Cc: sta...@dpdk.org

Signed-off-by: Igor Romanov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_filter.c | 14 ++
 drivers/net/sfc/sfc_filter.h | 10 ++
 drivers/net/sfc/sfc_flow.c   | 17 +++--
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/net/sfc/sfc_filter.c b/drivers/net/sfc/sfc_filter.c
index 77e2ea562..6ff380a36 100644
--- a/drivers/net/sfc/sfc_filter.c
+++ b/drivers/net/sfc/sfc_filter.c
@@ -75,6 +75,7 @@ int
 sfc_filter_attach(struct sfc_adapter *sa)
 {
int rc;
+   unsigned int i;
 
sfc_log_init(sa, "entry");
 
@@ -88,6 +89,19 @@ sfc_filter_attach(struct sfc_adapter *sa)
 
efx_filter_fini(sa->nic);
 
+   sa->filter.supports_ip_proto_or_addr_filter = B_FALSE;
+   sa->filter.supports_rem_or_local_port_filter = B_FALSE;
+   for (i = 0; i < sa->filter.supported_match_num; ++i) {
+   if (sa->filter.supported_match[i] &
+   (EFX_FILTER_MATCH_IP_PROTO | EFX_FILTER_MATCH_LOC_HOST |
+EFX_FILTER_MATCH_REM_HOST))
+   sa->filter.supports_ip_proto_or_addr_filter = B_TRUE;
+
+   if (sa->filter.supported_match[i] &
+   (EFX_FILTER_MATCH_LOC_PORT | EFX_FILTER_MATCH_REM_PORT))
+   sa->filter.supports_rem_or_local_port_filter = B_TRUE;
+   }
+
sfc_log_init(sa, "done");
 
return 0;
diff --git a/drivers/net/sfc/sfc_filter.h b/drivers/net/sfc/sfc_filter.h
index d3e1c2f9c..64ab114e0 100644
--- a/drivers/net/sfc/sfc_filter.h
+++ b/drivers/net/sfc/sfc_filter.h
@@ -25,6 +25,16 @@ struct sfc_filter {
uint32_t*supported_match;
/** List of flow rules */
struct sfc_flow_listflow_list;
+   /**
+* Supports any of ip_proto, remote host or local host
+* filters. This flag is used for filter match exceptions
+*/
+   boolean_t   supports_ip_proto_or_addr_filter;
+   /**
+* Supports any of remote port or local port filters.
+* This flag is used for filter match exceptions
+*/
+   boolean_t   supports_rem_or_local_port_filter;
 };
 
 struct sfc_adapter;
diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index 18387415e..bfb7b24f0 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -2095,11 +2095,14 @@ sfc_flow_is_match_with_vids(efx_filter_match_flags_t 
match_flags,
  * Check whether the spec maps to a hardware filter which is known to be
  * ineffective despite being valid.
  *
+ * @param filter[in]
+ *   SFC filter with list of supported filters.
  * @param spec[in]
  *   SFC flow specification.
  */
 static boolean_t
-sfc_flow_is_match_flags_exception(struct sfc_flow_spec *spec)
+sfc_flow_is_match_flags_exception(struct sfc_filter *filter,
+ struct sfc_flow_spec *spec)
 {
unsigned int i;
uint16_t ether_type;
@@ -2115,8 +2118,9 @@ sfc_flow_is_match_flags_exception(struct sfc_flow_spec 
*spec)
EFX_FILTER_MATCH_ETHER_TYPE |
EFX_FILTER_MATCH_LOC_MAC)) {
ether_type = spec->filters[i].efs_ether_type;
-   if (ether_type == EFX_ETHER_TYPE_IPV4 ||
-   ether_type == EFX_ETHER_TYPE_IPV6)
+   if (filter->supports_ip_proto_or_addr_filter &&
+   (ether_type == EFX_ETHER_TYPE_IPV4 ||
+ether_type == EFX_ETHER_TYPE_IPV6))
return B_TRUE;
} else if (sfc_flow_is_match_with_vids(match_flags,
EFX_FILTER_MATCH_ETHER_TYPE |
@@ -2126,8 +2130,9 @@ sfc_flow_is_match_flags_exception(struct sfc_flow_spec 
*spec)
EFX_FILTER_MATCH_IP_PROTO |
EFX_FILTER_MATCH_LOC_MAC)) {
ip_proto = spec->filters[i].efs_ip_proto;
-   if (ip_proto == EFX_IPPROTO_TCP ||
-   ip_proto == EFX_IPPROTO_UDP)
+   if (filter->supports_rem_or_local_port_filter &&
+   (ip_proto == EFX_IPPROTO_TCP ||
+ip_proto == EFX_IPPROTO_UDP))
return B_TRUE;
}
}
@@ -2154,7 +2159,7 @@ sfc_flow_validate_match_flags(struct sfc_a

[dpdk-dev] [PATCH] net/sfc: fallback to filter with zero vid

2018-07-14 Thread Andrew Rybchenko
From: Igor Romanov 

Fallback to filter with VLAN=0 if match without VLAN is not supported
Strictly speaking it is not 100% equivalent, but good tradeoff -
untagged and priority only tagged frames will match.

Signed-off-by: Igor Romanov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_flow.c | 75 ++
 1 file changed, 75 insertions(+)

diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index 18387415e..32305ac98 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -93,6 +93,8 @@ static sfc_flow_spec_check sfc_flow_check_unknown_dst_flags;
 static sfc_flow_spec_set_vals sfc_flow_set_ethertypes;
 static sfc_flow_spec_set_vals sfc_flow_set_ifrm_unknown_dst_flags;
 static sfc_flow_spec_check sfc_flow_check_ifrm_unknown_dst_flags;
+static sfc_flow_spec_set_vals sfc_flow_set_outer_vid_flag;
+static sfc_flow_spec_check sfc_flow_check_outer_vid_flag;
 
 static boolean_t
 sfc_flow_is_zero(const uint8_t *buf, unsigned int size)
@@ -1780,6 +1782,43 @@ sfc_flow_set_ethertypes(struct sfc_flow_spec *spec,
return 0;
 }
 
+/**
+ * Set the EFX_FILTER_MATCH_OUTER_VID match flag with value 0
+ * in the same specifications after copying.
+ *
+ * @param spec[in, out]
+ *   SFC flow specification to update.
+ * @param filters_count_for_one_val[in]
+ *   How many specifications should have the same match flag, what is the
+ *   number of specifications before copying.
+ * @param error[out]
+ *   Perform verbose error reporting if not NULL.
+ */
+static int
+sfc_flow_set_outer_vid_flag(struct sfc_flow_spec *spec,
+   unsigned int filters_count_for_one_val,
+   struct rte_flow_error *error)
+{
+   unsigned int i;
+
+   if (filters_count_for_one_val != spec->count) {
+   rte_flow_error_set(error, EINVAL,
+   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+   "Number of specifications is incorrect "
+   "while copying by outer VLAN ID");
+   return -rte_errno;
+   }
+
+   for (i = 0; i < spec->count; i++) {
+   spec->filters[i].efs_match_flags |=
+   EFX_FILTER_MATCH_OUTER_VID;
+
+   spec->filters[i].efs_outer_vid = 0;
+   }
+
+   return 0;
+}
+
 /**
  * Set the EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST and
  * EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST match flags in the same
@@ -1859,6 +1898,36 @@ 
sfc_flow_check_ifrm_unknown_dst_flags(efx_filter_match_flags_t match,
return B_FALSE;
 }
 
+/**
+ * Check that the list of supported filters has a filter that differs
+ * from @p match in that it has no flag EFX_FILTER_MATCH_OUTER_VID
+ * in this case that filter will be used and the flag
+ * EFX_FILTER_MATCH_OUTER_VID is not needed.
+ *
+ * @param match[in]
+ *   The match flags of filter.
+ * @param spec[in]
+ *   Specification to be supplemented.
+ * @param filter[in]
+ *   SFC filter with list of supported filters.
+ */
+static boolean_t
+sfc_flow_check_outer_vid_flag(efx_filter_match_flags_t match,
+ __rte_unused efx_filter_spec_t *spec,
+ struct sfc_filter *filter)
+{
+   unsigned int i;
+   efx_filter_match_flags_t match_without_vid =
+   match & ~EFX_FILTER_MATCH_OUTER_VID;
+
+   for (i = 0; i < filter->supported_match_num; i++) {
+   if (match_without_vid == filter->supported_match[i])
+   return B_FALSE;
+   }
+
+   return B_TRUE;
+}
+
 /*
  * Match flags that can be automatically added to filters.
  * Selecting the last minimum when searching for the copy flag ensures that the
@@ -1886,6 +1955,12 @@ static const struct sfc_flow_copy_flag 
sfc_flow_copy_flags[] = {
.set_vals = sfc_flow_set_ifrm_unknown_dst_flags,
.spec_check = sfc_flow_check_ifrm_unknown_dst_flags,
},
+   {
+   .flag = EFX_FILTER_MATCH_OUTER_VID,
+   .vals_count = 1,
+   .set_vals = sfc_flow_set_outer_vid_flag,
+   .spec_check = sfc_flow_check_outer_vid_flag,
+   },
 };
 
 /* Get item from array sfc_flow_copy_flags */
-- 
2.17.1



Re: [dpdk-dev] [PATCH] net/thunderx: add support for Rx VLAN offload

2018-07-14 Thread Andrew Rybchenko

On 13.07.2018 17:16, rkudurumalla wrote:

On 07/04/2018 11:06 PM, Ferruh Yigit wrote:

External Email

On 7/1/2018 5:46 PM, Pavan Nikhilesh wrote:

From: "Kudurumalla, Rakesh" 

This feature is used to offload stripping of vlan header from recevied
packets and update vlan_tci field in mbuf when
DEV_RX_OFFLOAD_VLAN_STRIP & ETH_VLAN_STRIP_MASK flag is set.

Signed-off-by: Rakesh Kudurumalla 
Signed-off-by: Pavan Nikhilesh 
---
  drivers/net/thunderx/base/nicvf_hw.c |  1 +
  drivers/net/thunderx/nicvf_ethdev.c  | 59 +--
  drivers/net/thunderx/nicvf_rxtx.c| 70 
  drivers/net/thunderx/nicvf_rxtx.h| 15 --
  drivers/net/thunderx/nicvf_struct.h  |  1 +

In thunderx.ini, "VLAN offload" already marked as P(Partially) is it still
partially? Why?

It is still partial because Tx VLAN offload(insertion of vlan header >

for tx packets) is yet to be Implemented

<...>


@@ -1590,9 +1595,9 @@ nicvf_vf_start(struct rte_eth_dev *dev, struct nicvf 
*nic, uint32_t rbdrsz)
nic->rbdr->tail, nb_rbdr_desc, nic->vf_id);

   /* Configure VLAN Strip */
- vlan_strip = !!(dev->data->dev_conf.rxmode.offloads &
- DEV_RX_OFFLOAD_VLAN_STRIP);
- nicvf_vlan_hw_strip(nic, vlan_strip);
+ mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
+ ETH_VLAN_EXTEND_MASK;

You don't need anything more than ETH_VLAN_STRIP_MASK but agreed no issue add
more if you prefer.


+ ret = nicvf_vlan_offload_config(dev, mask);

   /* Based on the packet type(IPv4 or IPv6), the nicvf HW aligns L3 data
* to the 64bit memory address.
@@ -1983,6 +1988,7 @@ static const struct eth_dev_ops nicvf_eth_dev_ops = {
   .dev_infos_get= nicvf_dev_info_get,
   .dev_supported_ptypes_get = nicvf_dev_supported_ptypes_get,
   .mtu_set  = nicvf_dev_set_mtu,
+ .vlan_offload_set = nicvf_vlan_offload_set,

Not related to this patch but I believe this name 'vlan_offload_set' is
confusing, it enable/disable VLAN related config:
- vlan strip offload
- vlan filtering package (drop/accept specific vlans)
- double vlan feature (not offload if I am not missing anything)
We can think about a more proper name later...

Also rte_eth_dev_set_vlan_offload() API may have a defect, it seems not taking
capability flags into account, cc'ed Shahaf and Andrew for information.


Yes, the function could check if corresponding offloads are supported.

Right now we have unified interface to control offloads on device configure
and queues setup. This API to change VLAN offloads looks really legacy now.
If we really need API to control offloads at run time (I'm not sure), it 
should

be generic API for all offloads and corresponding information in dev_info
which specifies offloads are controllable at run time.


And I have a question about DEV_TX_OFFLOAD_VLAN_INSERT, perhaps goes to Olivier,
if DEV_TX_OFFLOAD_VLAN_INSERT enabled what is the correct way to provide
vlan_tci to insert?
And do we need something like PKT_RX_VLAN_INSERT and use mbuf->vlan_tci value to
have the ability to insert VLAN to some packets?


As I understand ol_flags should have PKT_TX_VLAN. Right now the description
does not mentione it, however the description for double-tagged insertion
PKT_TX_QINQ does.



Re: [dpdk-dev] DPDK 18.05 only works with up to 4 NUMAs systems

2018-07-14 Thread Kumar, Ravi1
>On 28-Jun-18 8:03 AM, Kumar, Ravi1 wrote:

>>> On 22-Jun-18 5:37 PM, Kumar, Ravi1 wrote:

 Hi,



 As the memory subsystem in DPDK 18.05 is reworked, it has introduced a 
 problem for AMD EPYC 2P platforms.

 The issue is that DPDK 18.05 only works with up to 4 NUMAs. For AMD EPYC 
 2P platforms, DPDK now only works with P0 (NUMA 0-3) and does not work 
 with P1 (NUMA 4-7).



 The problem can be fixed by reducing some of the default settings of the 
 memory subsystem.



 To solve this issue:

 -  We can create our own config file for our integrated 10G 
 NIC, that is for amd_xgbe PMD. This will make amd_xgbe immune to this 
 problem.

 -  However, when any other NIC (Intel, Mellanox, Cavium or 
 Broadcom etc.) is plugged into NUMA 4-7, the problem will still be exposed.

 -  If we only fix it in "config/common_base", it will cover 
 all cases.



 Our current workaround is:

 Edit config file "./config/common_base" and change the following line

   CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072

 TO

   CONFIG_RTE_MAX_MEM_MB_PER_TYPE=65536



 Any better solution for this issue is welcome.



 We would appreciate if this issue can be fixed in the next release (18.08) 
 so the STOCK version of DPDK works on AMD EPYC 2P platforms.



 Regards,

 Ravi



>>>

>>> Hi Ravi,

>>>

>>> What is the reason behind this limitation? Is it too much virtual memory 
>>> being preallocated?

>>>

>>> --

>>> Thanks,

>>> Anatoly

>>>

>> Hi Anatoly,

>>

>> We believe this is true.  By default, too much virtual memory is being 
>> preallocated. The result is it can only support up to 4 NUMAs.

>>

>> Our workaround is to reduce the amount of preallocated virtual memory by 
>> half, so to support up to 8 NUMAs.

>>

>> Regards,

>> Ravi

>>

>

>I assume you see a bunch of failed mmap() calls with ENOMEM?

>

>In general, changing base config that way is an OK change, and it won't even 
>be an ABI break since this memory is allocated at runtime. I just want to make 
>sure that we fix the underlying problem, rather than the symptom.

>

>--

>Thanks,

>Anatoly

Hi Anatoly,



Sorry for the late reply. I have been away and took me some time to get the 
logs.



Here are some more details.



Dpdk-18.05/config/common_base contains the constants used to configure the 
memory subsystem.



CONFIG_RTE_MAX_NUMA_NODES=8

CONFIG_RTE_MAX_MEMSEG_LISTS=64

# each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages

# or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller

CONFIG_RTE_MAX_MEMSEG_PER_LIST=8192

CONFIG_RTE_MAX_MEM_MB_PER_LIST=32768

# a "type" is a combination of page size and NUMA node. total number of memseg

# lists per type will be limited to either RTE_MAX_MEMSEG_PER_TYPE pages (split

# over multiple lists of RTE_MAX_MEMSEG_PER_LIST pages), or

# RTE_MAX_MEM_MB_PER_TYPE megabytes of memory (split over multiple lists of

# RTE_MAX_MEM_MB_PER_LIST), whichever is smaller

CONFIG_RTE_MAX_MEMSEG_PER_TYPE=32768

CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072

# global maximum usable amount of VA, in megabytes

CONFIG_RTE_MAX_MEM_MB=524288



From the documentation.

Dpdk-18.05/doc/guides/prog_guide/env_abstraction_layer.rst



All possible virtual memory space that can ever be used for hugepage mapping in 
a DPDK process is preallocated at startup, thereby placing an upper limit on 
how much memory a DPDK application can have. DPDK memory is stored in segment 
lists, each segment is strictly one physical page. It is possible to change the 
amount of virtual memory being preallocated at startup by editing the following 
config variables:



* ``CONFIG_RTE_MAX_MEMSEG_LISTS`` controls how many segment lists can DPDK have

* ``CONFIG_RTE_MAX_MEM_MB_PER_LIST`` controls how much megabytes of memory each 
segment list can address

* ``CONFIG_RTE_MAX_MEMSEG_PER_LIST`` controls how many segments each segment 
can have

* ``CONFIG_RTE_MAX_MEMSEG_PER_TYPE`` controls how many segments each memory typ 
can have (where "type" is defined as "page size + NUMA node" combination)

* ``CONFIG_RTE_MAX_MEM_MB_PER_TYPE`` controls how much megabytes of memory each 
memory type can address

* ``CONFIG_RTE_MAX_MEM_MB`` places a global maximum on the amount of memory 
DPDK can reserve



Normally, these options do not need to be changed.



.. note::



Preallocated virtual memory is not to be confused with preallocated hugepage 
memory! All DPDK processes preallocate virtual memory at startup. Hugepages  
can later be mapped into that preallocated VA space (if dynamic memory mode is 
enabled), and can optionally be mapped into it at startup.



Memory setup with 2M pages works with the default configuration.  With the 
default configuration and 2M hugepages



1.Total amount of memory for each NUMA zone does not e

Re: [dpdk-dev] [PATCH v11 07/25] eal: introduce device class abstraction

2018-07-14 Thread Thomas Monjalon
12/07/2018 09:41, Gaƫtan Rivet:
> On Thu, Jul 12, 2018 at 12:19:09PM +0530, Shreyansh Jain wrote:
> > Any reason you don't want the rte_class_find and rte_class_find_by_name as
> > exposed APIs? There is no experimental tag on these APIs either.
> 
> No actually I just overlooked that part! Thanks for catching this, I
> think it should be exposed and tagged experimental.

Fixup below:

--- a/lib/librte_eal/common/eal_common_class.c
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -29,6 +29,7 @@ rte_class_unregister(struct rte_class *class)
RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
 }
 
+__rte_experimental
 struct rte_class *
 rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
   const void *data)
@@ -55,6 +56,7 @@ cmp_class_name(const struct rte_class *class, const void 
*_name)
return strcmp(class->name, name);
 }
 
+__rte_experimental
 struct rte_class *
 rte_class_find_by_name(const char *name)
 {

--- a/lib/librte_eal/common/include/rte_class.h
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -76,6 +76,7 @@ typedef int (*rte_class_cmp_t)(const struct rte_class *cls, 
const void *data);
  * @return
  *  A pointer to a rte_class structure or NULL in case no class matches
  */
+__rte_experimental
 struct rte_class *
 rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
   const void *data);
@@ -83,6 +84,7 @@ rte_class_find(const struct rte_class *start, rte_class_cmp_t 
cmp,
 /**
  * Find the registered class for a given name.
  */
+__rte_experimental
 struct rte_class *
 rte_class_find_by_name(const char *name);
 

--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -254,6 +254,8 @@ DPDK_18.08 {
 EXPERIMENTAL {
global:
 
+   rte_class_find;
+   rte_class_find_by_name;
rte_class_register;
rte_class_unregister;
rte_ctrl_thread_create;





Re: [dpdk-dev] [PATCH v11 08/25] devargs: add function to parse device layers

2018-07-14 Thread Thomas Monjalon
11/07/2018 23:44, Gaetan Rivet:
> This function is private to the EAL.
> It is used to parse each layers in a device description string,
> and store the result in an rte_devargs structure.
> 
> Signed-off-by: Gaetan Rivet 

There is a compilation issue.
It makes EAL depends on kvargs.
This change is required (to be squashed):

--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -18,6 +18,7 @@ CFLAGS += $(WERROR_FLAGS) -O3
 LDLIBS += -lexecinfo
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
 
 EXPORT_MAP := ../../rte_eal_version.map
 

--- a/lib/librte_eal/bsdapp/eal/meson.build
+++ b/lib/librte_eal/bsdapp/eal/meson.build
@@ -16,3 +16,5 @@ env_sources = files('eal_alarm.c',
'eal_memory.c',
'eal_dev.c'
 )
+
+deps += ['kvargs']

--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -24,6 +24,7 @@ LDLIBS += -ldl
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
 LDLIBS += -lrt
+LDLIBS += -lrte_kvargs
 ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
 LDLIBS += -lnuma
 endif

--- a/lib/librte_eal/linuxapp/eal/meson.build
+++ b/lib/librte_eal/linuxapp/eal/meson.build
@@ -23,6 +23,7 @@ env_sources = files('eal_alarm.c',
'eal_dev.c',
 )
 
+deps += ['kvargs']
 if has_libnuma == 1
dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
 endif





[dpdk-dev] [Bug 5] This is a Bug, Please Disabled Create new Account

2018-07-14 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=5

Ajit Khaparde (ajit.khapa...@broadcom.com) changed:

   What|Removed |Added

 Status|CONFIRMED   |RESOLVED
 CC||ajit.khapa...@broadcom.com
 Resolution|--- |WONTFIX

--- Comment #1 from Ajit Khaparde (ajit.khapa...@broadcom.com) ---
Closing for lack of information.

-- 
You are receiving this mail because:
You are the assignee for the bug.