[dpdk-dev] [PATCH v2] net/ice: refactor the protocol extraction design

2020-10-26 Thread Haiyue Wang
Change the protocol extraction dynamic mbuf usage from regiser API to
lookup API, so the application can decide to read the metadata or not
at the run time, in other words, PMD will check this at Rx queue start
time.

This design makes the API simple now: it just needs to export the name
string, not the whole dynamic mbuf data objects.

Signed-off-by: Haiyue Wang 
---
v2: update the commit message, doc; and add the error handling for
dynamic mbuf lookup. Also keep the metadata format defination.
---
 doc/guides/nics/ice.rst   |  16 ++--
 drivers/net/ice/ice_ethdev.c  | 117 +--
 drivers/net/ice/ice_ethdev.h  |   1 +
 drivers/net/ice/ice_rxtx.c|  62 
 drivers/net/ice/ice_rxtx.h|   1 +
 drivers/net/ice/rte_pmd_ice.h | 171 +++---
 drivers/net/ice/version.map   |  13 ---
 7 files changed, 106 insertions(+), 275 deletions(-)

diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
index a2aea12333..9878b665b3 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -156,19 +156,17 @@ Runtime Config Options
+++
|   IPHDR2   |   IPHDR1   |
+++
-   |   IPv6 HDR Offset  |   IPv4 HDR Offset  |
+   | Reserved   |   IP Header Offset |
+++
 
-  IPHDR1 - Outer/Single IPv4 Header offset.
+  IPHDR1 - Outer/Single IPv4/IPv6 Header offset.
 
-  IPHDR2 - Outer/Single IPv6 Header offset.
+  IPHDR2 - Reserved.
 
-  Use ``rte_net_ice_dynf_proto_xtr_metadata_get`` to access the protocol
-  extraction metadata, and use ``RTE_PKT_RX_DYNF_PROTO_XTR_*`` to get the
-  metadata type of ``struct rte_mbuf::ol_flags``.
-
-  The ``rte_net_ice_dump_proto_xtr_metadata`` routine shows how to
-  access the protocol extraction result in ``struct rte_mbuf``.
+  The dynamic mbuf field for metadata uses 
"rte_pmd_dynfield_proto_xtr_metadata"
+  name with 4 byte size. And the related dynamic mbuf flag uses the name format
+  "rte_pmd_dynflag_proto_xtr_*" which ends with the protocol extraction devargs
+  name such as "ip_offset".
 
 Driver compilation and testing
 --
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 51b99c6506..9e7d71ae4d 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -32,42 +32,6 @@ static const char * const ice_valid_args[] = {
NULL
 };
 
-static const struct rte_mbuf_dynfield ice_proto_xtr_metadata_param = {
-   .name = "ice_dynfield_proto_xtr_metadata",
-   .size = sizeof(uint32_t),
-   .align = __alignof__(uint32_t),
-   .flags = 0,
-};
-
-struct proto_xtr_ol_flag {
-   const struct rte_mbuf_dynflag param;
-   uint64_t *ol_flag;
-   bool required;
-};
-
-static bool ice_proto_xtr_hw_support[PROTO_XTR_MAX];
-
-static struct proto_xtr_ol_flag ice_proto_xtr_ol_flag_params[] = {
-   [PROTO_XTR_VLAN] = {
-   .param = { .name = "ice_dynflag_proto_xtr_vlan" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_vlan_mask },
-   [PROTO_XTR_IPV4] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv4" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv4_mask },
-   [PROTO_XTR_IPV6] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv6" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_mask },
-   [PROTO_XTR_IPV6_FLOW] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv6_flow" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask },
-   [PROTO_XTR_TCP] = {
-   .param = { .name = "ice_dynflag_proto_xtr_tcp" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_tcp_mask },
-   [PROTO_XTR_IP_OFFSET] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ip_offset" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ip_offset_mask },
-};
-
 #define ICE_DFLT_OUTER_TAG_TYPE ICE_AQ_VSI_OUTER_TAG_VLAN_9100
 
 #define ICE_OS_DEFAULT_PKG_NAME"ICE OS Default Package"
@@ -542,7 +506,7 @@ handle_proto_xtr_arg(__rte_unused const char *key, const 
char *value,
 }
 
 static void
-ice_check_proto_xtr_support(struct ice_hw *hw)
+ice_check_proto_xtr_support(struct ice_pf *pf, struct ice_hw *hw)
 {
 #define FLX_REG(val, fld, idx) \
(((val) & GLFLXP_RXDID_FLX_WRD_##idx##_##fld##_M) >> \
@@ -587,7 +551,7 @@ ice_check_proto_xtr_support(struct ice_hw *hw)
 
if (FLX_REG(v, PROT_MDID, 4) == xtr_sets[i].protid_0 &&
FLX_REG(v, RXDID_OPCODE, 4) == xtr_sets[i].opcode)
-   ice_proto_xtr_hw_support[i] = true;
+   pf->hw_proto_xtr_ena[i] = 1;
}
 
if (xtr_sets[i].protid_1 != I

[dpdk-dev] [PATCH v3] net/ice: refactor the protocol extraction design

2020-10-26 Thread Haiyue Wang
Change the protocol extraction dynamic mbuf usage from register API to
lookup API, so the application can decide to read the metadata or not
at the run time, in other words, PMD will check this at Rx queue start
time.

This design makes the API simple now: it just needs to export the name
string, not the whole dynamic mbuf data objects.

Signed-off-by: Haiyue Wang 
---
v3: Fix 'regiser' typo in commit message.

v2: update the commit message, doc; and add the error handling for
dynamic mbuf lookup. Also keep the metadata format defination.
---
 doc/guides/nics/ice.rst   |  16 ++--
 drivers/net/ice/ice_ethdev.c  | 117 +--
 drivers/net/ice/ice_ethdev.h  |   1 +
 drivers/net/ice/ice_rxtx.c|  62 
 drivers/net/ice/ice_rxtx.h|   1 +
 drivers/net/ice/rte_pmd_ice.h | 171 +++---
 drivers/net/ice/version.map   |  13 ---
 7 files changed, 106 insertions(+), 275 deletions(-)

diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
index a2aea12333..9878b665b3 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -156,19 +156,17 @@ Runtime Config Options
+++
|   IPHDR2   |   IPHDR1   |
+++
-   |   IPv6 HDR Offset  |   IPv4 HDR Offset  |
+   | Reserved   |   IP Header Offset |
+++
 
-  IPHDR1 - Outer/Single IPv4 Header offset.
+  IPHDR1 - Outer/Single IPv4/IPv6 Header offset.
 
-  IPHDR2 - Outer/Single IPv6 Header offset.
+  IPHDR2 - Reserved.
 
-  Use ``rte_net_ice_dynf_proto_xtr_metadata_get`` to access the protocol
-  extraction metadata, and use ``RTE_PKT_RX_DYNF_PROTO_XTR_*`` to get the
-  metadata type of ``struct rte_mbuf::ol_flags``.
-
-  The ``rte_net_ice_dump_proto_xtr_metadata`` routine shows how to
-  access the protocol extraction result in ``struct rte_mbuf``.
+  The dynamic mbuf field for metadata uses 
"rte_pmd_dynfield_proto_xtr_metadata"
+  name with 4 byte size. And the related dynamic mbuf flag uses the name format
+  "rte_pmd_dynflag_proto_xtr_*" which ends with the protocol extraction devargs
+  name such as "ip_offset".
 
 Driver compilation and testing
 --
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 51b99c6506..9e7d71ae4d 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -32,42 +32,6 @@ static const char * const ice_valid_args[] = {
NULL
 };
 
-static const struct rte_mbuf_dynfield ice_proto_xtr_metadata_param = {
-   .name = "ice_dynfield_proto_xtr_metadata",
-   .size = sizeof(uint32_t),
-   .align = __alignof__(uint32_t),
-   .flags = 0,
-};
-
-struct proto_xtr_ol_flag {
-   const struct rte_mbuf_dynflag param;
-   uint64_t *ol_flag;
-   bool required;
-};
-
-static bool ice_proto_xtr_hw_support[PROTO_XTR_MAX];
-
-static struct proto_xtr_ol_flag ice_proto_xtr_ol_flag_params[] = {
-   [PROTO_XTR_VLAN] = {
-   .param = { .name = "ice_dynflag_proto_xtr_vlan" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_vlan_mask },
-   [PROTO_XTR_IPV4] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv4" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv4_mask },
-   [PROTO_XTR_IPV6] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv6" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_mask },
-   [PROTO_XTR_IPV6_FLOW] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ipv6_flow" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask },
-   [PROTO_XTR_TCP] = {
-   .param = { .name = "ice_dynflag_proto_xtr_tcp" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_tcp_mask },
-   [PROTO_XTR_IP_OFFSET] = {
-   .param = { .name = "ice_dynflag_proto_xtr_ip_offset" },
-   .ol_flag = &rte_net_ice_dynflag_proto_xtr_ip_offset_mask },
-};
-
 #define ICE_DFLT_OUTER_TAG_TYPE ICE_AQ_VSI_OUTER_TAG_VLAN_9100
 
 #define ICE_OS_DEFAULT_PKG_NAME"ICE OS Default Package"
@@ -542,7 +506,7 @@ handle_proto_xtr_arg(__rte_unused const char *key, const 
char *value,
 }
 
 static void
-ice_check_proto_xtr_support(struct ice_hw *hw)
+ice_check_proto_xtr_support(struct ice_pf *pf, struct ice_hw *hw)
 {
 #define FLX_REG(val, fld, idx) \
(((val) & GLFLXP_RXDID_FLX_WRD_##idx##_##fld##_M) >> \
@@ -587,7 +551,7 @@ ice_check_proto_xtr_support(struct ice_hw *hw)
 
if (FLX_REG(v, PROT_MDID, 4) == xtr_sets[i].protid_0 &&
FLX_REG(v, RXDID_OPCODE, 4) == xtr_sets[i].opcode)
-   ice_proto_xtr_hw_support[i] = true;
+   pf->hw_proto_xtr_ena[i] = 1;
}
 
 

Re: [dpdk-dev] [PATCH v4 1/3] net/ice: add AVX512 vector path

2020-10-26 Thread Rong, Leyi

> -Original Message-
> From: David Marchand 
> Sent: Monday, October 26, 2020 12:24 AM
> To: Rong, Leyi ; Zhang, Qi Z ; 
> Yigit,
> Ferruh 
> Cc: Lu, Wenzhuo ; dev ; Richardson,
> Bruce ; Thomas Monjalon
> 
> Subject: Re: [dpdk-dev] [PATCH v4 1/3] net/ice: add AVX512 vector path
> 
> Hello Leyi, Qi, Ferruh,
> 
> On Fri, Oct 23, 2020 at 6:37 AM Leyi Rong  wrote:
> > diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
> > index 254595af85..85e8baf912 100644
> > --- a/drivers/net/ice/meson.build
> > +++ b/drivers/net/ice/meson.build
> > @@ -34,6 +34,17 @@ if arch_subdir == 'x86'
> > c_args: [cflags, '-mavx2'])
> > objs += ice_avx2_lib.extract_objects('ice_rxtx_vec_avx2.c')
> > endif
> > +
> > +   if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512F') or (not
> machine_args.contains('-mno-avx512f') and cc.has_argument('-mavx512f'))
> > +   cflags += ['-DCC_AVX512_SUPPORT']
> > +   ice_avx512_lib = static_library('ice_avx512_lib',
> > + 'ice_rxtx_vec_avx512.c',
> > + dependencies: [static_rte_ethdev,
> > +   static_rte_kvargs, static_rte_hash],
> > + include_directories: includes,
> > + c_args: [cflags, 
> > '-march=skylake-avx512', '-mavx512f'])
> > +   objs += 
> > ice_avx512_lib.extract_objects('ice_rxtx_vec_avx512.c')
> > +   endif
> >  endif
> >
> >  sources += files('ice_dcf.c',
> > --
> > 2.17.1
> >
> 
> RTE_MACHINE_CPUFLAG_AVX512F can be removed.
> Worth fixing before hitting the main tree.
> 
> Thanks.
> 
> --
> David Marchand

Hello David,

Would prefer using __AVX512F__ instead of RTE_MACHINE_CPUFLAG_AVX512F here 
rather than remove the RTE_MACHINE_CPUFLAG_ macro directly to check the CPU 
capability.
So the judgment statement will be 
if cc.get_define('__AVX512F__', args: machine_args) != '' or (not 
machine_args.contains('-mno-avx512f') and cc.has_argument('-mavx512f'))

what do you think?


Hello Ferruh,

As the patchset is already merged into dpdk-next-net, I'm going to make another 
patch for this if it's accepted?


Re: [dpdk-dev] [PATCH] net/i40e: fix FDIR issue for ETH + VLAN pattern

2020-10-26 Thread Guo, Jia
Acked-by: Jeff Guo 

> -Original Message-
> From: Xing, Beilei 
> Sent: Tuesday, October 27, 2020 2:22 PM
> To: dev@dpdk.org
> Cc: Guo, Jia ; Xing, Beilei ;
> sta...@dpdk.org
> Subject: [PATCH] net/i40e: fix FDIR issue for ETH + VLAN pattern
> 
> From: Beilei Xing 
> 
> Currently, can't create more than one following flow for ETH + VLAN pattern.
> 
> 1. flow create 0 ingress pattern eth / vlan vid is 350 / end
>actions queue index 2 / end
> 2. flow create 0 ingress pattern eth / vlan vid is 351 / end
>actions queue index 3 / end
> 
> The root cause is the vlan_tci is not set correctly, it will cause  the keys 
> of
> both of the two flows are the same.
> 
> Fixes: 42044b69c67d ("net/i40e: support input set selection for FDIR")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/net/i40e/i40e_flow.c | 35 ---
>  1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
> index 8e7a9989b3..5bec0c7a84 100644
> --- a/drivers/net/i40e/i40e_flow.c
> +++ b/drivers/net/i40e/i40e_flow.c
> @@ -27,7 +27,10 @@
>  #define I40E_IPV6_TC_MASK(0xFF << I40E_FDIR_IPv6_TC_OFFSET)
>  #define I40E_IPV6_FRAG_HEADER44
>  #define I40E_TENANT_ARRAY_NUM3
> -#define I40E_TCI_MASK0x
> +#define I40E_VLAN_TCI_MASK   0x
> +#define I40E_VLAN_PRI_MASK   0xE000
> +#define I40E_VLAN_CFI_MASK   0x1000
> +#define I40E_VLAN_VID_MASK   0x0FFF
> 
>  static int i40e_flow_validate(struct rte_eth_dev *dev,
> const struct rte_flow_attr *attr, @@ -2705,12
> +2708,22 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
> 
>   RTE_ASSERT(!(input_set &
> I40E_INSET_LAST_ETHER_TYPE));
>   if (vlan_spec && vlan_mask) {
> - if (vlan_mask->tci ==
> - rte_cpu_to_be_16(I40E_TCI_MASK)) {
> - input_set |=
> I40E_INSET_VLAN_INNER;
> - filter->input.flow_ext.vlan_tci =
> - vlan_spec->tci;
> + if (vlan_mask->tci !=
> + rte_cpu_to_be_16(I40E_VLAN_TCI_MASK)
> &&
> + vlan_mask->tci !=
> + rte_cpu_to_be_16(I40E_VLAN_PRI_MASK)
> &&
> + vlan_mask->tci !=
> + rte_cpu_to_be_16(I40E_VLAN_CFI_MASK)
> &&
> + vlan_mask->tci !=
> +
> rte_cpu_to_be_16(I40E_VLAN_VID_MASK)) {
> + rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ITEM,
> +item,
> +"Unsupported TCI mask.");
>   }
> + input_set |= I40E_INSET_VLAN_INNER;
> + filter->input.flow_ext.vlan_tci =
> + vlan_spec->tci;
>   }
>   if (vlan_spec && vlan_mask && vlan_mask-
> >inner_type) {
>   if (vlan_mask->inner_type !=
> RTE_BE16(0x)) { @@ -3894,10 +3907,10 @@
> i40e_flow_parse_vxlan_pattern(__rte_unused struct rte_eth_dev *dev,
> 
>   if (vlan_spec && vlan_mask) {
>   if (vlan_mask->tci ==
> - rte_cpu_to_be_16(I40E_TCI_MASK))
> + rte_cpu_to_be_16(I40E_VLAN_TCI_MASK))
>   filter->inner_vlan =
> rte_be_to_cpu_16(vlan_spec->tci)
> &
> -   I40E_TCI_MASK;
> +   I40E_VLAN_TCI_MASK;
>   filter_type |= ETH_TUNNEL_FILTER_IVLAN;
>   }
>   break;
> @@ -4125,10 +4138,10 @@ i40e_flow_parse_nvgre_pattern(__rte_unused
> struct rte_eth_dev *dev,
> 
>   if (vlan_spec && vlan_mask) {
>   if (vlan_mask->tci ==
> - rte_cpu_to_be_16(I40E_TCI_MASK))
> + rte_cpu_to_be_16(I40E_VLAN_TCI_MASK))
>   filter->inner_vlan =
> rte_be_to_cpu_16(vlan_spec->tci)
> &
> -   I40E_TCI_MASK;
> +   I40E_VLAN_TCI_MASK;
>   filter_type |= ETH_TUNNEL_FILTER_IVLAN;
>   }
>   break;
> @@ -4800,7 +4813,7 @@ i40e_flow_parse_rss_pattern(__rte_unused struct
> rte_eth_dev *dev,
>   vlan_mask = item->mask;
>   

Re: [dpdk-dev] [Patch v2] net/vdev_netvsc: prevent alarm loss on failed device probe

2020-10-26 Thread Matan Azrad
Hi Long

From: Long Li 
> If a device probe fails, the alarm is canceled and will no longer work for
> previously probed devices.
> 
> Fix this by checking if alarm is necessary at the end of each device probe.
> Reset the alarm if there are vdev_netvsc_ctx created.
> 
> Change log:
> v2: removed lock and flags, use counter to decide if alarm should be reset
> 
> Cc: sta...@dpdk.org
> Signed-off-by: Long Li 

I suggest the next title:
net/vdev_netvsc: fix device probing error flow

and the next fixes line:
Fixes: e7dc5d7becc5 ("net/vdev_netvsc: implement core functionality")

Acked-by: Matan Azrad 


[dpdk-dev] [PATCH 1/2] common/mlx5: add virtq attributes error fields

2020-10-26 Thread Xueming Li
Add the needed fields for virtq DevX object to read the error state.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
 drivers/common/mlx5/mlx5_prm.h   | 9 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 8aee12d527..dc426e9b09 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1754,6 +1754,9 @@ mlx5_devx_cmd_query_virtq(struct mlx5_devx_obj *virtq_obj,
attr->hw_available_index = MLX5_GET16(virtio_net_q, virtq,
  hw_available_index);
attr->hw_used_index = MLX5_GET16(virtio_net_q, virtq, hw_used_index);
+   attr->state = MLX5_GET16(virtio_net_q, virtq, state);
+   attr->error_type = MLX5_GET16(virtio_net_q, virtq,
+ virtio_q_context.error_type);
return ret;
 }
 
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index abbea67784..0ea2427b75 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -298,6 +298,7 @@ struct mlx5_devx_virtq_attr {
uint32_t size;
uint64_t offset;
} umems[3];
+   uint8_t error_type;
 };
 
 
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index d342263c85..7d671a3996 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2280,7 +2280,8 @@ struct mlx5_ifc_virtio_q_bits {
u8 used_addr[0x40];
u8 available_addr[0x40];
u8 virtio_q_mkey[0x20];
-   u8 reserved_at_160[0x20];
+   u8 reserved_at_160[0x18];
+   u8 error_type[0x8];
u8 umem_1_id[0x20];
u8 umem_1_size[0x20];
u8 umem_1_offset[0x40];
@@ -2308,7 +2309,7 @@ struct mlx5_ifc_virtio_net_q_bits {
u8 vhost_log_page[0x5];
u8 reserved_at_90[0xc];
u8 state[0x4];
-   u8 error_type[0x8];
+   u8 reserved_at_a0[0x8];
u8 tisn_or_qpn[0x18];
u8 dirty_bitmap_mkey[0x20];
u8 dirty_bitmap_size[0x20];
@@ -2329,6 +2330,10 @@ struct mlx5_ifc_query_virtq_out_bits {
struct mlx5_ifc_virtio_net_q_bits virtq;
 };
 
+enum {
+   MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27,
+};
+
 enum {
MLX5_QP_ST_RC = 0x0,
 };
-- 
2.25.1



[dpdk-dev] [PATCH 2/2] vdpa/mlx5: hardware error handling

2020-10-26 Thread Xueming Li
When hardware error happens, vdpa didn't get such information and leave
driver in silent: working state but no response.

This patch subscribes firmware virtq error event and try to recover max
3 times in 10 seconds, stop virtq if max retry number reached.

When error happens, PMD log in warning level. If failed to recover,
outputs error log. Query virtq statitics to get error counters report.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c   |   2 +
 drivers/vdpa/mlx5/mlx5_vdpa.h   |  37 
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 140 
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c |  61 +---
 4 files changed, 225 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index a8f3e4b1de..ba779c10ee 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -283,6 +283,7 @@ mlx5_vdpa_dev_close(int vid)
}
if (priv->configured)
ret |= mlx5_vdpa_lm_log(priv);
+   mlx5_vdpa_err_event_unset(priv);
mlx5_vdpa_cqe_event_unset(priv);
mlx5_vdpa_steer_unset(priv);
mlx5_vdpa_virtqs_release(priv);
@@ -318,6 +319,7 @@ mlx5_vdpa_dev_config(int vid)
DRV_LOG(WARNING, "MTU cannot be set on device %s.",
vdev->device->name);
if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) ||
+   mlx5_vdpa_err_event_setup(priv) ||
mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
mlx5_vdpa_cqe_event_setup(priv)) {
mlx5_vdpa_dev_close(vid);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index fcbc12ab0c..0d6886c52c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -87,6 +87,7 @@ struct mlx5_vdpa_virtq {
uint16_t vq_size;
uint8_t notifier_state;
bool stopped;
+   uint32_t version;
struct mlx5_vdpa_priv *priv;
struct mlx5_devx_obj *virtq;
struct mlx5_devx_obj *counters;
@@ -97,6 +98,8 @@ struct mlx5_vdpa_virtq {
uint32_t size;
} umems[3];
struct rte_intr_handle intr_handle;
+   uint64_t err_time[3]; /* RDTSC time of recent errors. */
+   uint32_t n_retry;
struct mlx5_devx_virtio_q_couners_attr reset;
 };
 
@@ -143,8 +146,10 @@ struct mlx5_vdpa_priv {
struct rte_vhost_memory *vmem;
uint32_t eqn;
struct mlx5dv_devx_event_channel *eventc;
+   struct mlx5dv_devx_event_channel *err_chnl;
struct mlx5dv_devx_uar *uar;
struct rte_intr_handle intr_handle;
+   struct rte_intr_handle err_intr_handle;
struct mlx5_devx_obj *td;
struct mlx5_devx_obj *tis;
uint16_t nr_virtqs;
@@ -259,6 +264,25 @@ int mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv);
  */
 void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
 
+/**
+ * Setup error interrupt handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv);
+
+/**
+ * Unset error event handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ */
+void mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv);
+
 /**
  * Release a virtq and all its related resources.
  *
@@ -392,6 +416,19 @@ int mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, 
int state);
  */
 int mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index);
 
+/**
+ * Query virtq information.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ * @param[in] index
+ *   The virtq index.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index);
+
 /**
  * Get virtq statistics.
  *
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 8a01e42794..89df699dad 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -15,11 +15,14 @@
 #include 
 
 #include 
+#include 
 
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_ERROR_TIME_SEC 3u
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -378,6 +381,143 @@ mlx5_vdpa_interrupt_handler(void *cb_arg)
pthread_mutex_unlock(&priv->vq_config_lock);
 }
 
+static void
+mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
+{
+#ifdef HAVE_IBV_DEVX_EVENT
+   struct mlx5_vdpa_priv *priv = cb_arg;
+   union {
+   struct mlx5dv_devx_async_event_hdr event_resp;
+   uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
+   } out;
+   uint32_t vq_index, i, version;
+   struct mlx5_vdpa_virtq *virtq;
+   uint64_t sec;
+
+   pthread_mutex_lock(&priv->vq_config_lock);
+   

[dpdk-dev] [RFC] vhost: support raising device error

2020-10-26 Thread Xueming Li
According to virtio spec, The device SHOULD set DEVICE_NEEDS_RESET when
it enters an error state that a reset is needed. If DRIVER_OK is set,
after it sets DEVICE_NEEDS_RESET, the device MUST send a device
configuration change notification to the driver.

This patch introduces new api to raise vDPA hardware error and escalates
configuration change to vhost via client message
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG.

The vhost should check DRIVER_OK and decide whether to notify driver.

Signed-off-by: Xueming Li 
---
 lib/librte_vhost/rte_vdpa_dev.h | 12 
 lib/librte_vhost/version.map|  1 +
 lib/librte_vhost/vhost_user.c   | 14 ++
 3 files changed, 27 insertions(+)

diff --git a/lib/librte_vhost/rte_vdpa_dev.h b/lib/librte_vhost/rte_vdpa_dev.h
index a60183f780..87b7397c6f 100644
--- a/lib/librte_vhost/rte_vdpa_dev.h
+++ b/lib/librte_vhost/rte_vdpa_dev.h
@@ -117,6 +117,18 @@ rte_vdpa_unregister_device(struct rte_vdpa_device *dev);
 int
 rte_vhost_host_notifier_ctrl(int vid, uint16_t qid, bool enable);
 
+/**
+ * Set device hardware error and notify host.
+ *
+ * @param vid
+ *  vhost device id
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_host_raise_error(int vid);
+
 /**
  * Synchronize the used ring from mediated ring to guest, log dirty
  * page for each writeable buffer, caller should handle the used
diff --git a/lib/librte_vhost/version.map b/lib/librte_vhost/version.map
index 9183d6f2fc..5a4c5dc818 100644
--- a/lib/librte_vhost/version.map
+++ b/lib/librte_vhost/version.map
@@ -76,4 +76,5 @@ EXPERIMENTAL {
rte_vhost_async_channel_unregister;
rte_vhost_submit_enqueue_burst;
rte_vhost_poll_enqueue_completed;
+   rte_vhost_host_raise_error;
 };
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index d20c8c57ad..d8353176f2 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -2992,6 +2992,20 @@ rte_vhost_slave_config_change(int vid, bool need_reply)
return vhost_user_slave_config_change(dev, need_reply);
 }
 
+int
+rte_vhost_host_raise_error(int vid)
+{
+   struct virtio_net *dev;
+
+   dev = get_device(vid);
+   if (!dev)
+   return -ENODEV;
+
+   dev->status |= VIRTIO_DEVICE_STATUS_DEV_NEED_RESET;
+
+   return vhost_user_slave_config_change(dev, 0);
+}
+
 static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
int index, int fd,
uint64_t offset,
-- 
2.25.1



[dpdk-dev] [PATCH v4 00/15] bnxt fixes and enhancements

2020-10-26 Thread Ajit Khaparde
This patchset has support for VXLAN decap full offload,
SR device support in tf_core and a few bug fixes.

v1->v2:
 - Squashed some patches.
 - Updated commit logs.
 - Fixed a compilation issue reported during clang builds.

v2->v3:
 - Added the SPDX header for cfa_resource_types.h in patch [01/11].
 - Update commit logs.

v3->v4:
 - Addressed review comments.
 - Added verbose commit messages.

Ajit Khaparde (3):
  net/bnxt: increase the size of Rx CQ
  net/bnxt: fix to reset mbuf data offset
  net/bnxt: set thread safe flow ops flag

Farah Smith (1):
  net/bnxt: add table scope to PF Mapping

Jay Ding (1):
  net/bnxt: support two table scopes

Kishore Padmanabha (2):
  net/bnxt: add hierarchical flow counters
  net/bnxt: add mapper support for wildcard TCAM

Peter Spreadborough (2):
  net/bnxt: add stingray support to core layer
  net/bnxt: update ULP resource counts

Rahul Gupta (1):
  net/bnxt: fix Rx performance by removing spinlock

Somnath Kotur (2):
  net/bnxt: fix flow query count
  net/bnxt: modify HWRM command to create reps

Venkat Duvvuru (3):
  net/bnxt: refactor flow id allocation
  net/bnxt: add VXLAN decap templates
  net/bnxt: add VXLAN decap offload support

 doc/guides/nics/bnxt.rst  |   20 +
 doc/guides/rel_notes/release_20_11.rst|3 +
 drivers/net/bnxt/bnxt.h   |   10 +-
 drivers/net/bnxt/bnxt_cpr.c   |   12 +
 drivers/net/bnxt/bnxt_cpr.h   |1 +
 drivers/net/bnxt/bnxt_ethdev.c|8 +
 drivers/net/bnxt/bnxt_hwrm.c  |   60 +-
 drivers/net/bnxt/bnxt_hwrm.h  |2 -
 drivers/net/bnxt/bnxt_reps.c  |   18 +-
 drivers/net/bnxt/bnxt_ring.h  |2 +-
 drivers/net/bnxt/bnxt_rxq.c   |4 -
 drivers/net/bnxt/bnxt_rxq.h   |3 -
 drivers/net/bnxt/bnxt_rxr.c   |6 +-
 drivers/net/bnxt/bnxt_rxr.h   |2 -
 drivers/net/bnxt/bnxt_txr.h   |2 -
 drivers/net/bnxt/hcapi/hcapi_cfa.h|   39 +-
 drivers/net/bnxt/meson.build  |1 +
 drivers/net/bnxt/tf_core/cfa_resource_types.h |   82 +-
 drivers/net/bnxt/tf_core/tf_core.c|   57 +-
 drivers/net/bnxt/tf_core/tf_core.h|   57 +-
 drivers/net/bnxt/tf_core/tf_device.c  |9 +-
 drivers/net/bnxt/tf_core/tf_device.h  |   47 +-
 drivers/net/bnxt/tf_core/tf_device_p4.c   |   69 +-
 drivers/net/bnxt/tf_core/tf_device_p4.h   |6 +
 drivers/net/bnxt/tf_core/tf_device_p45.h  |  105 +
 drivers/net/bnxt/tf_core/tf_em.h  |   25 +-
 drivers/net/bnxt/tf_core/tf_em_common.c   |  176 +-
 drivers/net/bnxt/tf_core/tf_em_common.h   |   27 +-
 drivers/net/bnxt/tf_core/tf_em_host.c |   31 +-
 drivers/net/bnxt/tf_core/tf_global_cfg.c  |4 +-
 drivers/net/bnxt/tf_core/tf_global_cfg.h  |   42 +-
 drivers/net/bnxt/tf_core/tf_msg.c |   13 +-
 drivers/net/bnxt/tf_core/tf_msg.h |4 +-
 drivers/net/bnxt/tf_core/tf_session.h |   16 +-
 drivers/net/bnxt/tf_core/tf_tbl.h |3 +
 drivers/net/bnxt/tf_core/tfp.c|   19 +
 drivers/net/bnxt/tf_core/tfp.h|   16 +
 drivers/net/bnxt/tf_ulp/bnxt_tf_common.h  |4 +-
 drivers/net/bnxt/tf_ulp/bnxt_ulp.c|   50 +-
 drivers/net/bnxt/tf_ulp/bnxt_ulp.h|   12 +
 drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c   |  129 +-
 drivers/net/bnxt/tf_ulp/ulp_def_rules.c   |   48 +-
 drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c  |  133 +-
 drivers/net/bnxt/tf_ulp/ulp_fc_mgr.h  |   19 +
 drivers/net/bnxt/tf_ulp/ulp_flow_db.c |  475 +++-
 drivers/net/bnxt/tf_ulp/ulp_flow_db.h |   46 +
 drivers/net/bnxt/tf_ulp/ulp_mapper.c  |   83 +-
 drivers/net/bnxt/tf_ulp/ulp_mapper.h  |6 +-
 drivers/net/bnxt/tf_ulp/ulp_rte_parser.c  |   85 +-
 drivers/net/bnxt/tf_ulp/ulp_rte_parser.h  |   12 +
 .../net/bnxt/tf_ulp/ulp_template_db_class.c   |  962 ---
 .../net/bnxt/tf_ulp/ulp_template_db_enum.h|   42 +-
 .../net/bnxt/tf_ulp/ulp_template_db_field.h   |  363 ++-
 .../tf_ulp/ulp_template_db_stingray_act.c |  116 +-
 .../tf_ulp/ulp_template_db_stingray_class.c   | 2493 ++---
 drivers/net/bnxt/tf_ulp/ulp_template_db_tbl.c |9 +-
 .../bnxt/tf_ulp/ulp_template_db_wh_plus_act.c |  116 +-
 .../tf_ulp/ulp_template_db_wh_plus_class.c| 2489 +---
 drivers/net/bnxt/tf_ulp/ulp_template_struct.h |7 +
 drivers/net/bnxt/tf_ulp/ulp_tun.c |  310 ++
 drivers/net/bnxt/tf_ulp/ulp_tun.h |   92 +
 drivers/net/bnxt/tf_ulp/ulp_utils.c   |  125 +
 drivers/net/bnxt/tf_ulp/ulp_utils.h   |   48 +-
 63 files changed, 7225 insertions(+), 2050 deletions(-)
 create mode 100644 drivers/net/bnxt/tf_core/tf_device_p45.h
 create mode 100644 drivers/net/bnxt/tf_ulp/ulp_tun.c
 create mode 100644 drivers

[dpdk-dev] [PATCH v4 03/15] net/bnxt: add table scope to PF Mapping

2020-10-26 Thread Ajit Khaparde
From: Farah Smith 

Add table scope to PF Mapping for SR and Wh+ devices.
Legacy devices require PF set of base addresses for EEM operation.
A table scope id is a logical construct and is mapped to the PF
associated with the communications channel used.
In the case of a VF, the parent PF is used.

Signed-off-by: Farah Smith 
Reviewed-by: Randy Schacher 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_core/tf_device.h| 10 +++---
 drivers/net/bnxt/tf_core/tf_device_p4.c | 10 +-
 drivers/net/bnxt/tf_core/tf_em_common.c |  6 --
 drivers/net/bnxt/tf_core/tf_em_host.c   |  8 
 drivers/net/bnxt/tf_core/tf_tbl.h   |  3 +++
 drivers/net/bnxt/tf_core/tfp.c  | 19 +++
 drivers/net/bnxt/tf_core/tfp.h  | 16 
 7 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bnxt/tf_core/tf_device.h 
b/drivers/net/bnxt/tf_core/tf_device.h
index cf7c36e0ea..b5fc695ece 100644
--- a/drivers/net/bnxt/tf_core/tf_device.h
+++ b/drivers/net/bnxt/tf_core/tf_device.h
@@ -579,8 +579,11 @@ struct tf_dev_ops {
 * [in] tfp
 *   Pointer to TF handle
 *
-* [in] parms
-*   Pointer to table scope map parameters
+* [in] pf
+* PF associated with the table scope
+*
+* [in] parif_bitmask
+* Bitmask of PARIFs to enable
 *
 * [in/out] pointer to the parif_2_pf data to be updated
 *
@@ -593,7 +596,8 @@ struct tf_dev_ops {
 *-EINVAL - Error
 */
int (*tf_dev_map_parif)(struct tf *tfp,
-   struct tf_map_tbl_scope_parms *parms,
+   uint16_t parif_bitmask,
+   uint16_t pf,
uint8_t *data,
uint8_t *mask,
uint16_t sz_in_bytes);
diff --git a/drivers/net/bnxt/tf_core/tf_device_p4.c 
b/drivers/net/bnxt/tf_core/tf_device_p4.c
index 07c8d02faa..b35e65a8de 100644
--- a/drivers/net/bnxt/tf_core/tf_device_p4.c
+++ b/drivers/net/bnxt/tf_core/tf_device_p4.c
@@ -103,7 +103,8 @@ tf_dev_p4_get_tcam_slice_info(struct tf *tfp __rte_unused,
 
 static int
 tf_dev_p4_map_parif(struct tf *tfp __rte_unused,
-   struct tf_map_tbl_scope_parms *parms,
+   uint16_t parif_bitmask,
+   uint16_t pf,
uint8_t *data,
uint8_t *mask,
uint16_t sz_in_bytes)
@@ -112,21 +113,20 @@ tf_dev_p4_map_parif(struct tf *tfp __rte_unused,
uint32_t parif_pf_mask[2] = { 0 };
uint32_t parif;
uint32_t shift;
-   uint32_t scope_id = (uint32_t)(parms->tbl_scope_id);
 
if (sz_in_bytes != sizeof(uint64_t))
return -ENOTSUP;
 
for (parif = 0; parif < TF_DEV_P4_PARIF_MAX; parif++) {
-   if (parms->parif_bitmask & (1UL << parif)) {
+   if (parif_bitmask & (1UL << parif)) {
if (parif < 8) {
shift = 4 * parif;
parif_pf_mask[0] |= TF_DEV_P4_PF_MASK << shift;
-   parif_pf[0] |= scope_id << shift;
+   parif_pf[0] |= pf << shift;
} else {
shift = 4 * (parif - 8);
parif_pf_mask[1] |= TF_DEV_P4_PF_MASK << shift;
-   parif_pf[1] |= scope_id << shift;
+   parif_pf[1] |= pf << shift;
}
}
}
diff --git a/drivers/net/bnxt/tf_core/tf_em_common.c 
b/drivers/net/bnxt/tf_core/tf_em_common.c
index d4e8469edf..ad92cbdc75 100644
--- a/drivers/net/bnxt/tf_core/tf_em_common.c
+++ b/drivers/net/bnxt/tf_core/tf_em_common.c
@@ -1104,8 +1104,10 @@ int tf_em_ext_map_tbl_scope(struct tf *tfp,
}
mask = aparms.mem_va;
 
-   rc = dev->ops->tf_dev_map_parif(tfp, parms, (uint8_t *)data,
-   (uint8_t *)mask, sz_in_bytes);
+   rc = dev->ops->tf_dev_map_parif(tfp, parms->parif_bitmask,
+   tbl_scope_cb->pf,
+   (uint8_t *)data, (uint8_t *)mask,
+   sz_in_bytes);
 
if (rc) {
TFP_DRV_LOG(ERR,
diff --git a/drivers/net/bnxt/tf_core/tf_em_host.c 
b/drivers/net/bnxt/tf_core/tf_em_host.c
index b5db94f3ef..a106bdffde 100644
--- a/drivers/net/bnxt/tf_core/tf_em_host.c
+++ b/drivers/net/bnxt/tf_core/tf_em_host.c
@@ -392,6 +392,14 @@ tf_em_ext_alloc(struct tf *tfp, struct 
tf_alloc_tbl_scope_parms *parms)
tbl_scope_cb->index = parms->tbl_scope_id;
tbl_scope_cb->tbl_scope_id = parms->tbl_scope_id;
 
+   rc = tfp_get_pf(tfp, &tbl_scope_cb->pf);
+   if (rc) {
+   TFP_DRV_LOG(ERR,
+   "EEM: PF q

[dpdk-dev] [PATCH v4 02/15] net/bnxt: support two table scopes

2020-10-26 Thread Ajit Khaparde
From: Jay Ding 

Adding support for two table scopes. One for Exact Match tables
and other for External Exact Match tables.
New API to map a PARIF to an EEM table scope (set of Rx and Tx EEM
base addresses). It uses HWRM_TF_GLOBAL_CFG_SET HWRM to configure.
PARIF is handler to a partition of the physical port.
Adjustments to tf_global_cfg_set() to reduce overhead and nominal
name clarification.

Signed-off-by: Jay Ding 
Signed-off-by: Farah Smith 
Reviewed-by: Randy Schacher 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_core/tf_core.c   |  54 +--
 drivers/net/bnxt/tf_core/tf_core.h   |  55 ++-
 drivers/net/bnxt/tf_core/tf_device.h |  43 +-
 drivers/net/bnxt/tf_core/tf_device_p4.c  |  44 ++
 drivers/net/bnxt/tf_core/tf_em.h |  19 ++-
 drivers/net/bnxt/tf_core/tf_em_common.c  | 174 ---
 drivers/net/bnxt/tf_core/tf_em_common.h  |  27 +---
 drivers/net/bnxt/tf_core/tf_em_host.c|  23 ++-
 drivers/net/bnxt/tf_core/tf_global_cfg.c |   4 +-
 drivers/net/bnxt/tf_core/tf_global_cfg.h |  42 ++
 drivers/net/bnxt/tf_core/tf_msg.c|  13 +-
 drivers/net/bnxt/tf_core/tf_msg.h|   4 +-
 12 files changed, 326 insertions(+), 176 deletions(-)

diff --git a/drivers/net/bnxt/tf_core/tf_core.c 
b/drivers/net/bnxt/tf_core/tf_core.c
index 788335b814..0f49a00256 100644
--- a/drivers/net/bnxt/tf_core/tf_core.c
+++ b/drivers/net/bnxt/tf_core/tf_core.c
@@ -303,7 +303,6 @@ int tf_get_global_cfg(struct tf *tfp,
int rc = 0;
struct tf_session *tfs;
struct tf_dev_info *dev;
-   struct tf_dev_global_cfg_parms gparms = { 0 };
 
TF_CHECK_PARMS2(tfp, parms);
 
@@ -342,12 +341,7 @@ int tf_get_global_cfg(struct tf *tfp,
return -EOPNOTSUPP;
}
 
-   gparms.dir = parms->dir;
-   gparms.type = parms->type;
-   gparms.offset = parms->offset;
-   gparms.config = parms->config;
-   gparms.config_sz_in_bytes = parms->config_sz_in_bytes;
-   rc = dev->ops->tf_dev_get_global_cfg(tfp, &gparms);
+   rc = dev->ops->tf_dev_get_global_cfg(tfp, parms);
if (rc) {
TFP_DRV_LOG(ERR,
"%s: Global Cfg get failed, rc:%s\n",
@@ -371,7 +365,6 @@ int tf_set_global_cfg(struct tf *tfp,
int rc = 0;
struct tf_session *tfs;
struct tf_dev_info *dev;
-   struct tf_dev_global_cfg_parms gparms = { 0 };
 
TF_CHECK_PARMS2(tfp, parms);
 
@@ -410,12 +403,7 @@ int tf_set_global_cfg(struct tf *tfp,
return -EOPNOTSUPP;
}
 
-   gparms.dir = parms->dir;
-   gparms.type = parms->type;
-   gparms.offset = parms->offset;
-   gparms.config = parms->config;
-   gparms.config_sz_in_bytes = parms->config_sz_in_bytes;
-   rc = dev->ops->tf_dev_set_global_cfg(tfp, &gparms);
+   rc = dev->ops->tf_dev_set_global_cfg(tfp, parms);
if (rc) {
TFP_DRV_LOG(ERR,
"%s: Global Cfg set failed, rc:%s\n",
@@ -1352,6 +1340,44 @@ tf_alloc_tbl_scope(struct tf *tfp,
 
return rc;
 }
+int
+tf_map_tbl_scope(struct tf *tfp,
+  struct tf_map_tbl_scope_parms *parms)
+{
+   struct tf_session *tfs;
+   struct tf_dev_info *dev;
+   int rc;
+
+   TF_CHECK_PARMS2(tfp, parms);
+
+   /* Retrieve the session information */
+   rc = tf_session_get_session(tfp, &tfs);
+   if (rc) {
+   TFP_DRV_LOG(ERR,
+   "Failed to lookup session, rc:%s\n",
+   strerror(-rc));
+   return rc;
+   }
+
+   /* Retrieve the device information */
+   rc = tf_session_get_device(tfs, &dev);
+   if (rc) {
+   TFP_DRV_LOG(ERR,
+   "Failed to lookup device, rc:%s\n",
+   strerror(-rc));
+   return rc;
+   }
+
+   if (dev->ops->tf_dev_map_tbl_scope != NULL) {
+   rc = dev->ops->tf_dev_map_tbl_scope(tfp, parms);
+   } else {
+   TFP_DRV_LOG(ERR,
+   "Map table scope not supported by device\n");
+   return -EINVAL;
+   }
+
+   return rc;
+}
 
 int
 tf_free_tbl_scope(struct tf *tfp,
diff --git a/drivers/net/bnxt/tf_core/tf_core.h 
b/drivers/net/bnxt/tf_core/tf_core.h
index 65be8f54a4..fa8ab52af1 100644
--- a/drivers/net/bnxt/tf_core/tf_core.h
+++ b/drivers/net/bnxt/tf_core/tf_core.h
@@ -898,7 +898,9 @@ struct tf_alloc_tbl_scope_parms {
 */
uint32_t tbl_scope_id;
 };
-
+/**
+ * tf_free_tbl_scope_parms definition
+ */
 struct tf_free_tbl_scope_parms {
/**
 * [in] table scope identifier
@@ -906,6 +908,21 @@ struct tf_free_tbl_scope_parms {
uint32_t tbl_scope_id;
 };
 
+/**
+ * tf_map_tbl_scope_parms definition
+ */
+struct tf_map_tbl_scope_parms {
+   /**
+* [in] table scope identifier
+*/
+   uint32_t tbl_scope_id;
+   /**
+* [in] Which parifs 

[dpdk-dev] [PATCH v4 01/15] net/bnxt: add stingray support to core layer

2020-10-26 Thread Ajit Khaparde
From: Peter Spreadborough 

- Moved P4 chip specific code under the P4 directory
- Added P45 skeleton code for SR to build on
- Add SR support in TRUFLOW core layer.
The TRUFLOW core or the tf-core is a shim layer which communicates with
the CFA block in the hardware.

Signed-off-by: Peter Spreadborough 
Signed-off-by: Jay Ding 
Reviewed-by: Farah Smith 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/hcapi/hcapi_cfa.h|  39 +--
 drivers/net/bnxt/tf_core/cfa_resource_types.h |  82 +-
 drivers/net/bnxt/tf_core/tf_core.c|   3 +-
 drivers/net/bnxt/tf_core/tf_core.h|   2 +-
 drivers/net/bnxt/tf_core/tf_device.c  |   9 +-
 drivers/net/bnxt/tf_core/tf_device_p4.c   |  25 -
 drivers/net/bnxt/tf_core/tf_device_p4.h   |   6 +
 drivers/net/bnxt/tf_core/tf_device_p45.h  | 105 ++
 drivers/net/bnxt/tf_core/tf_em.h  |   6 -
 9 files changed, 197 insertions(+), 80 deletions(-)
 create mode 100644 drivers/net/bnxt/tf_core/tf_device_p45.h

diff --git a/drivers/net/bnxt/hcapi/hcapi_cfa.h 
b/drivers/net/bnxt/hcapi/hcapi_cfa.h
index c7d87dec73..aa218d714d 100644
--- a/drivers/net/bnxt/hcapi/hcapi_cfa.h
+++ b/drivers/net/bnxt/hcapi/hcapi_cfa.h
@@ -14,7 +14,15 @@
 
 #include "hcapi_cfa_defs.h"
 
+#if CHIP_CFG == SR_A
+#define SUPPORT_CFA_HW_P45  1
+#undef SUPPORT_CFA_HW_P4
+#define SUPPORT_CFA_HW_P4   0
+#elif CHIP_CFG == CMB_A
 #define SUPPORT_CFA_HW_P4  1
+#else
+#error "Chip not supported"
+#endif
 
 #if SUPPORT_CFA_HW_P4 && SUPPORT_CFA_HW_P58 && SUPPORT_CFA_HW_P59
 #define SUPPORT_CFA_HW_ALL  1
@@ -81,17 +89,20 @@ struct hcapi_cfa_key_result {
 /* common CFA register access macros */
 #define CFA_REG(x) OFFSETOF(cfa_reg_t, cfa_##x)
 
-#ifndef REG_WR
-#define REG_WR(_p, x, y)  (*((uint32_t volatile *)(x)) = (y))
+#ifndef TF_REG_WR
+#define TF_REG_WR(_p, x, y)  (*((uint32_t volatile *)(x)) = (y))
 #endif
-#ifndef REG_RD
-#define REG_RD(_p, x)  (*((uint32_t volatile *)(x)))
+#ifndef TF_REG_RD
+#define TF_REG_RD(_p, x)  (*((uint32_t volatile *)(x)))
+#endif
+#ifndef TF_CFA_REG_RD
+#define TF_CFA_REG_RD(_p, x)   \
+   TF_REG_RD(0, (uint32_t)(_p)->base_addr + CFA_REG(x))
+#endif
+#ifndef TF_CFA_REG_WR
+#define TF_CFA_REG_WR(_p, x, y)\
+   TF_REG_WR(0, (uint32_t)(_p)->base_addr + CFA_REG(x), y)
 #endif
-#define CFA_REG_RD(_p, x)  \
-   REG_RD(0, (uint32_t)(_p)->base_addr + CFA_REG(x))
-#define CFA_REG_WR(_p, x, y)   \
-   REG_WR(0, (uint32_t)(_p)->base_addr + CFA_REG(x), y)
-
 
 /* Constants used by Resource Manager Registration*/
 #define RM_CLIENT_NAME_MAX_LEN  32
@@ -248,7 +259,15 @@ int hcapi_cfa_p4_mirror_hwop(struct hcapi_cfa_hwop *op,
 int hcapi_cfa_p4_global_cfg_hwop(struct hcapi_cfa_hwop *op,
 uint32_t type,
 struct hcapi_cfa_data *config);
-#endif /* SUPPORT_CFA_HW_P4 */
+/* SUPPORT_CFA_HW_P4 */
+#elif SUPPORT_CFA_HW_P45
+int hcapi_cfa_p45_mirror_hwop(struct hcapi_cfa_hwop *op,
+ struct hcapi_cfa_data *mirror);
+int hcapi_cfa_p45_global_cfg_hwop(struct hcapi_cfa_hwop *op,
+ uint32_t type,
+ struct hcapi_cfa_data *config);
+/* SUPPORT_CFA_HW_P45 */
+#endif
 /**
  *  HCAPI CFA device HW operation function callback definition
  *  This is standardized function callback hook to install different
diff --git a/drivers/net/bnxt/tf_core/cfa_resource_types.h 
b/drivers/net/bnxt/tf_core/cfa_resource_types.h
index 19838c393d..53b0187166 100644
--- a/drivers/net/bnxt/tf_core/cfa_resource_types.h
+++ b/drivers/net/bnxt/tf_core/cfa_resource_types.h
@@ -64,79 +64,47 @@
 #define CFA_RESOURCE_TYPE_P59_LAST  CFA_RESOURCE_TYPE_P59_VEB_TCAM
 
 
-/* Multicast Group */
-#define CFA_RESOURCE_TYPE_P58_MCG 0x0UL
-/* Encap 8 byte record */
-#define CFA_RESOURCE_TYPE_P58_ENCAP_8B0x1UL
-/* Encap 16 byte record */
-#define CFA_RESOURCE_TYPE_P58_ENCAP_16B   0x2UL
-/* Encap 64 byte record */
-#define CFA_RESOURCE_TYPE_P58_ENCAP_64B   0x3UL
-/* Source Property MAC */
-#define CFA_RESOURCE_TYPE_P58_SP_MAC  0x4UL
-/* Source Property MAC and IPv4 */
-#define CFA_RESOURCE_TYPE_P58_SP_MAC_IPV4 0x5UL
-/* Source Property MAC and IPv6 */
-#define CFA_RESOURCE_TYPE_P58_SP_MAC_IPV6 0x6UL
-/* Network Address Translation Port */
-#define CFA_RESOURCE_TYPE_P58_NAT_PORT0x7UL
-/* Network Address Translation IPv4 address */
-#define CFA_RESOURCE_TYPE_P58_NAT_IPV40x8UL
 /* Meter */
-#define CFA_RESOURCE_TYPE_P58_METER   0x9UL
-/* Flow State */
-#define CFA_RESOURCE_TYPE_P58_FLOW_STATE  0xaUL
-/* Full Action Records */
-#define CFA_RESOURCE_TYPE_P58_FULL_ACTION 0xbUL
-/* Action Record Format 0 */
-#define CFA_RESOURCE_TYPE_P58_FORMAT_0_ACTION 0xcUL
-/* Action Record Ext Format 0 */
-#define CFA_RESOURCE_TYPE_P58_EXT_FORMAT_0_ACTI

[dpdk-dev] [PATCH v4 04/15] net/bnxt: update ULP resource counts

2020-10-26 Thread Ajit Khaparde
From: Peter Spreadborough 

Update ULP resource counts for Stingray device.
- FW needs some resources for normal operation. Account those
in the resource manager.
- Update the SR ULP requested resource counts to reflect
those available after AFM resources are accounted for.
- Add build option to select either 2 or 4 slot EM entries.
The default is 4 slot entries.

Signed-off-by: Peter Spreadborough 
Signed-off-by: Farah Smith 
Reviewed-by: Randy Schacher 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_core/tf_session.h | 16 +--
 drivers/net/bnxt/tf_ulp/bnxt_ulp.c| 40 +--
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/bnxt/tf_core/tf_session.h 
b/drivers/net/bnxt/tf_core/tf_session.h
index aa7a27877d..6a5c894033 100644
--- a/drivers/net/bnxt/tf_core/tf_session.h
+++ b/drivers/net/bnxt/tf_core/tf_session.h
@@ -33,16 +33,16 @@
 #define TF_SESSION_ID_INVALID 0x /** Invalid Session ID define */
 
 /**
- * Number of EM entries. Static for now will be removed
- * when parameter added at a later date. At this stage we
- * are using fixed size entries so that each stack entry
- * represents 4 RT (f/n)blocks. So we take the total block
- * allocation for truflow and divide that by 4.
+ * At this stage we are using fixed size entries so that each
+ * stack entry represents either 2 or 4 RT (f/n)blocks. So we
+ * take the total block allocation for truflow and divide that
+ * by either 2 or 4.
  */
-#define TF_SESSION_TOTAL_FN_BLOCKS (1024 * 8) /* 8K blocks */
+#ifdef TF_EM_ENTRY_IPV4_ONLY
+#define TF_SESSION_EM_ENTRY_SIZE 2 /* 2 blocks per entry */
+#else
 #define TF_SESSION_EM_ENTRY_SIZE 4 /* 4 blocks per entry */
-#define TF_SESSION_EM_POOL_SIZE \
-   (TF_SESSION_TOTAL_FN_BLOCKS / TF_SESSION_EM_ENTRY_SIZE)
+#endif
 
 /**
  * Session
diff --git a/drivers/net/bnxt/tf_ulp/bnxt_ulp.c 
b/drivers/net/bnxt/tf_ulp/bnxt_ulp.c
index b22929a634..d753b5af9f 100644
--- a/drivers/net/bnxt/tf_ulp/bnxt_ulp.c
+++ b/drivers/net/bnxt/tf_ulp/bnxt_ulp.c
@@ -153,11 +153,11 @@ bnxt_ulp_tf_session_resources_get(struct bnxt *bp,
case BNXT_ULP_DEVICE_ID_STINGRAY:
/** RX **/
/* Identifiers */
-   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_L2_CTXT_HIGH] = 100;
+   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_L2_CTXT_HIGH] = 315;
res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_L2_CTXT_LOW] = 6;
-   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_WC_PROF] = 10;
-   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_PROF_FUNC] = 10;
-   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_EM_PROF] = 10;
+   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_WC_PROF] = 192;
+   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_PROF_FUNC] = 64;
+   res->ident_cnt[TF_DIR_RX].cnt[TF_IDENT_TYPE_EM_PROF] = 192;
 
/* Table Types */
res->tbl_cnt[TF_DIR_RX].cnt[TF_TBL_TYPE_FULL_ACT_RECORD] = 8192;
@@ -170,28 +170,28 @@ bnxt_ulp_tf_session_resources_get(struct bnxt *bp,
 
/* TCAMs */

res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_L2_CTXT_TCAM_HIGH] =
-   100;
+   315;
res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_L2_CTXT_TCAM_LOW] 
=
6;
-   res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_PROF_TCAM] = 100;
-   res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_WC_TCAM] = 0;
+   res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_PROF_TCAM] = 960;
+   res->tcam_cnt[TF_DIR_RX].cnt[TF_TCAM_TBL_TYPE_WC_TCAM] = 112;
 
/* EM */
-   res->em_cnt[TF_DIR_RX].cnt[TF_EM_TBL_TYPE_EM_RECORD] = 13168;
+   res->em_cnt[TF_DIR_RX].cnt[TF_EM_TBL_TYPE_EM_RECORD] = 13200;
 
/* EEM */
res->em_cnt[TF_DIR_RX].cnt[TF_EM_TBL_TYPE_TBL_SCOPE] = 1;
 
/* SP */
-   res->tbl_cnt[TF_DIR_RX].cnt[TF_TBL_TYPE_ACT_SP_SMAC] = 255;
+   res->tbl_cnt[TF_DIR_RX].cnt[TF_TBL_TYPE_ACT_SP_SMAC] = 256;
 
/** TX **/
/* Identifiers */
-   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_L2_CTXT_HIGH] = 100;
-   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_L2_CTXT_LOW] = 100;
-   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_WC_PROF] = 10;
-   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_PROF_FUNC] = 10;
-   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_EM_PROF] = 10;
+   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_L2_CTXT_HIGH] = 292;
+   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_L2_CTXT_LOW] = 127;
+   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_WC_PROF] = 192;
+   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_PROF_FUNC] = 64;
+   res->ident_cnt[TF_DIR_TX].cnt[TF_IDENT_TYPE_EM_PR

[dpdk-dev] [PATCH v4 05/15] net/bnxt: fix flow query count

2020-10-26 Thread Ajit Khaparde
From: Somnath Kotur 

Fix infinite loop in flow query count.
`nxt_resource_idx` could be zero in some cases which is invalid and
should be part of the while loop condition. Also synchronize access to
the flow db using the fdb_lock

Fixes: 306c2d28e247 ("net/bnxt: support count action in flow query")

Signed-off-by: Somnath Kotur 
Reviewed-by: Venkat Duvvuru 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c | 51 +++-
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c 
b/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
index 051ebac049..41736a80df 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
@@ -559,6 +559,9 @@ int ulp_fc_mgr_query_count_get(struct bnxt_ulp_context 
*ctxt,
if (!ulp_fc_info)
return -ENODEV;
 
+   if (bnxt_ulp_cntxt_acquire_fdb_lock(ctxt))
+   return -EIO;
+
do {
rc = ulp_flow_db_resource_get(ctxt,
  BNXT_ULP_FDB_TYPE_REGULAR,
@@ -575,35 +578,35 @@ int ulp_fc_mgr_query_count_get(struct bnxt_ulp_context 
*ctxt,
break;
}
 
-   } while (!rc);
+   } while (!rc && nxt_resource_index);
+
+   bnxt_ulp_cntxt_release_fdb_lock(ctxt);
 
-   if (rc)
+   if (rc || !found_cntr_resource)
return rc;
 
-   if (found_cntr_resource) {
-   dir = params.direction;
-   hw_cntr_id = params.resource_hndl;
-   sw_cntr_idx = hw_cntr_id -
-   ulp_fc_info->shadow_hw_tbl[dir].start_idx;
-   sw_acc_tbl_entry = &ulp_fc_info->sw_acc_tbl[dir][sw_cntr_idx];
-   if (params.resource_sub_type ==
+   dir = params.direction;
+   hw_cntr_id = params.resource_hndl;
+   sw_cntr_idx = hw_cntr_id -
+   ulp_fc_info->shadow_hw_tbl[dir].start_idx;
+   sw_acc_tbl_entry = &ulp_fc_info->sw_acc_tbl[dir][sw_cntr_idx];
+   if (params.resource_sub_type ==
BNXT_ULP_RESOURCE_SUB_TYPE_INDEX_TYPE_INT_COUNT) {
-   pthread_mutex_lock(&ulp_fc_info->fc_lock);
-   if (sw_acc_tbl_entry->pkt_count) {
-   count->hits_set = 1;
-   count->bytes_set = 1;
-   count->hits = sw_acc_tbl_entry->pkt_count;
-   count->bytes = sw_acc_tbl_entry->byte_count;
-   }
-   if (count->reset) {
-   sw_acc_tbl_entry->pkt_count = 0;
-   sw_acc_tbl_entry->byte_count = 0;
-   }
-   pthread_mutex_unlock(&ulp_fc_info->fc_lock);
-   } else {
-   /* TBD: Handle External counters */
-   rc = -EINVAL;
+   pthread_mutex_lock(&ulp_fc_info->fc_lock);
+   if (sw_acc_tbl_entry->pkt_count) {
+   count->hits_set = 1;
+   count->bytes_set = 1;
+   count->hits = sw_acc_tbl_entry->pkt_count;
+   count->bytes = sw_acc_tbl_entry->byte_count;
}
+   if (count->reset) {
+   sw_acc_tbl_entry->pkt_count = 0;
+   sw_acc_tbl_entry->byte_count = 0;
+   }
+   pthread_mutex_unlock(&ulp_fc_info->fc_lock);
+   } else {
+   /* TBD: Handle External counters */
+   rc = -EINVAL;
}
 
return rc;
-- 
2.21.1 (Apple Git-122.3)



[dpdk-dev] [PATCH v4 07/15] net/bnxt: modify HWRM command to create reps

2020-10-26 Thread Ajit Khaparde
From: Somnath Kotur 

Use cfa pair alloc for configuring reps.
Instead of cfa_vfr_alloc for Wh+ and cfa_pair_alloc for Stingray,
converge to cfa_pair_alloc/free for both devices. Set the command
request structure bits accordingly.
As part of this, remove the old cfa_vfr_alloc cmd definitions as FW
has deprecated support for those commands.

Signed-off-by: Somnath Kotur 
Reviewed-by: Shahaji Bhosle 
Reviewed-by: Ajit Khaparde 
---
 doc/guides/rel_notes/release_20_11.rst |  1 +
 drivers/net/bnxt/bnxt.h|  6 ++-
 drivers/net/bnxt/bnxt_ethdev.c |  2 +
 drivers/net/bnxt/bnxt_hwrm.c   | 60 +++---
 drivers/net/bnxt/bnxt_hwrm.h   |  2 -
 drivers/net/bnxt/bnxt_reps.c   | 18 
 6 files changed, 22 insertions(+), 67 deletions(-)

diff --git a/doc/guides/rel_notes/release_20_11.rst 
b/doc/guides/rel_notes/release_20_11.rst
index f9ef4fe77b..edbcaf170b 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -147,6 +147,7 @@ New Features
   * Added support for RSS hash level selection.
   * Updated HWRM structures to 1.10.1.70 version.
   * Added TRUFLOW support for Stingray devices.
+  * Added support for representors on MAIA cores of SR.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a951bca7aa..57178192d2 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -836,12 +836,14 @@ struct bnxt_representor {
 #define BNXT_REP_Q_F2R_VALID   BIT(2)
 #define BNXT_REP_FC_R2F_VALID  BIT(3)
 #define BNXT_REP_FC_F2R_VALID  BIT(4)
+#define BNXT_REP_BASED_PF_VALIDBIT(5)
uint32_tflags;
uint16_tfw_fid;
 #defineBNXT_DFLT_VNIC_ID_INVALID   0x
uint16_tdflt_vnic_id;
uint16_tsvif;
uint16_tvfr_tx_cfa_action;
+   uint8_t parent_pf_idx; /* Logical PF index */
uint32_tdpdk_port_id;
uint32_trep_based_pf;
uint8_t rep_q_r2f;
@@ -863,7 +865,9 @@ struct bnxt_representor {
uint64_trx_drop_bytes[BNXT_MAX_VF_REP_RINGS];
 };
 
-#define BNXT_REP_PF(vfr_bp)((vfr_bp)->flags & BNXT_REP_IS_PF)
+#define BNXT_REP_PF(vfr_bp)((vfr_bp)->flags & BNXT_REP_IS_PF)
+#define BNXT_REP_BASED_PF(vfr_bp)  \
+   ((vfr_bp)->flags & BNXT_REP_BASED_PF_VALID)
 
 struct bnxt_vf_rep_tx_queue {
struct bnxt_tx_queue *txq;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 5718cc877d..a0e01d059d 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -5765,6 +5765,8 @@ bnxt_parse_devarg_rep_based_pf(__rte_unused const char 
*key,
}
 
vfr_bp->rep_based_pf = rep_based_pf;
+   vfr_bp->flags |= BNXT_REP_BASED_PF_VALID;
+
PMD_DRV_LOG(INFO, "rep-based-pf = %d\n", vfr_bp->rep_based_pf);
 
return 0;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 361f99536c..84702125cc 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -5671,55 +5671,6 @@ int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
return 0;
 }
 
-int bnxt_hwrm_cfa_vfr_alloc(struct bnxt *bp, uint16_t vf_idx)
-{
-   struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
-   struct hwrm_cfa_vfr_alloc_input req = {0};
-   int rc;
-
-   if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
-   PMD_DRV_LOG(DEBUG,
-   "Not a PF or trusted VF. Command not supported\n");
-   return 0;
-   }
-
-   HWRM_PREP(&req, HWRM_CFA_VFR_ALLOC, BNXT_USE_CHIMP_MB);
-   req.vf_id = rte_cpu_to_le_16(vf_idx);
-   snprintf(req.vfr_name, sizeof(req.vfr_name), "%svfr%d",
-bp->eth_dev->data->name, vf_idx);
-
-   rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
-   HWRM_CHECK_RESULT();
-
-   HWRM_UNLOCK();
-   PMD_DRV_LOG(DEBUG, "VFR %d allocated\n", vf_idx);
-   return rc;
-}
-
-int bnxt_hwrm_cfa_vfr_free(struct bnxt *bp, uint16_t vf_idx)
-{
-   struct hwrm_cfa_vfr_free_output *resp = bp->hwrm_cmd_resp_addr;
-   struct hwrm_cfa_vfr_free_input req = {0};
-   int rc;
-
-   if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
-   PMD_DRV_LOG(DEBUG,
-   "Not a PF or trusted VF. Command not supported\n");
-   return 0;
-   }
-
-   HWRM_PREP(&req, HWRM_CFA_VFR_FREE, BNXT_USE_CHIMP_MB);
-   req.vf_id = rte_cpu_to_le_16(vf_idx);
-   snprintf(req.vfr_name, sizeof(req.vfr_name), "%svfr%d",
-bp->eth_dev->data->name, vf_idx);
-
-   rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
-   HWRM_CHECK_RESULT();
-   HWRM_UNLOCK();
-   PMD

[dpdk-dev] [PATCH v4 08/15] net/bnxt: add mapper support for wildcard TCAM

2020-10-26 Thread Ajit Khaparde
From: Kishore Padmanabha 

Added support for the key and mask fields encoding for the
wildcard TCAM entry. Also add internal function to post process
the key/mask blobs for wildcard TCAM table. The size of the
wildcard TCAM slice is 80 bytes.

Signed-off-by: Kishore Padmanabha 
Reviewed-by: Mike Baucom 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_ulp/ulp_mapper.c |  47 --
 drivers/net/bnxt/tf_ulp/ulp_utils.c  | 125 +++
 drivers/net/bnxt/tf_ulp/ulp_utils.h  |  48 +-
 3 files changed, 212 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bnxt/tf_ulp/ulp_mapper.c 
b/drivers/net/bnxt/tf_ulp/ulp_mapper.c
index b74cb92f57..27b4780990 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_mapper.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_mapper.c
@@ -1497,6 +1497,29 @@ ulp_mapper_tcam_tbl_entry_write(struct 
bnxt_ulp_mapper_parms *parms,
return rc;
 }
 
+#define BNXT_ULP_WC_TCAM_SLICE_SIZE 80
+/* internal function to post process the key/mask blobs for wildcard tcam tbl 
*/
+static void ulp_mapper_wc_tcam_tbl_post_process(struct ulp_blob *blob,
+   uint32_t len)
+{
+   uint8_t mode[2] = {0x0, 0x0};
+   uint32_t mode_len = len / BNXT_ULP_WC_TCAM_SLICE_SIZE;
+   uint32_t size, idx;
+
+   /* Add the mode bits to the key and mask*/
+   if (mode_len == 2)
+   mode[1] = 2;
+   else if (mode_len > 2)
+   mode[1] = 3;
+
+   size = BNXT_ULP_WC_TCAM_SLICE_SIZE + ULP_BYTE_2_BITS(sizeof(mode));
+   for (idx = 0; idx < mode_len; idx++)
+   ulp_blob_insert(blob, (size * idx), mode,
+   ULP_BYTE_2_BITS(sizeof(mode)));
+   ulp_blob_perform_64B_word_swap(blob);
+   ulp_blob_perform_64B_byte_swap(blob);
+}
+
 static int32_t
 ulp_mapper_tcam_tbl_process(struct bnxt_ulp_mapper_parms *parms,
struct bnxt_ulp_mapper_tbl_info *tbl)
@@ -1533,9 +1556,9 @@ ulp_mapper_tcam_tbl_process(struct bnxt_ulp_mapper_parms 
*parms,
return -EINVAL;
}
 
-   if (!ulp_blob_init(&key, tbl->key_bit_size,
+   if (!ulp_blob_init(&key, tbl->blob_key_bit_size,
   parms->device_params->byte_order) ||
-   !ulp_blob_init(&mask, tbl->key_bit_size,
+   !ulp_blob_init(&mask, tbl->blob_key_bit_size,
   parms->device_params->byte_order) ||
!ulp_blob_init(&data, tbl->result_bit_size,
   parms->device_params->byte_order) ||
@@ -1545,6 +1568,11 @@ ulp_mapper_tcam_tbl_process(struct bnxt_ulp_mapper_parms 
*parms,
return -EINVAL;
}
 
+   if (tbl->resource_type == TF_TCAM_TBL_TYPE_WC_TCAM) {
+   key.byte_order = BNXT_ULP_BYTE_ORDER_BE;
+   mask.byte_order = BNXT_ULP_BYTE_ORDER_BE;
+   }
+
/* create the key/mask */
/*
 * NOTE: The WC table will require some kind of flag to handle the
@@ -1570,6 +1598,11 @@ ulp_mapper_tcam_tbl_process(struct bnxt_ulp_mapper_parms 
*parms,
}
}
 
+   if (tbl->resource_type == TF_TCAM_TBL_TYPE_WC_TCAM) {
+   ulp_mapper_wc_tcam_tbl_post_process(&key, tbl->key_bit_size);
+   ulp_mapper_wc_tcam_tbl_post_process(&mask, tbl->key_bit_size);
+   }
+
if (tbl->srch_b4_alloc == BNXT_ULP_SEARCH_BEFORE_ALLOC_NO) {
/*
 * No search for re-use is requested, so simply allocate the
@@ -1578,18 +1611,18 @@ ulp_mapper_tcam_tbl_process(struct 
bnxt_ulp_mapper_parms *parms,
aparms.dir  = tbl->direction;
aparms.tcam_tbl_type= tbl->resource_type;
aparms.search_enable= tbl->srch_b4_alloc;
-   aparms.key_sz_in_bits   = tbl->key_bit_size;
aparms.key  = ulp_blob_data_get(&key, &tmplen);
-   if (tbl->key_bit_size != tmplen) {
+   aparms.key_sz_in_bits   = tmplen;
+   if (tbl->blob_key_bit_size != tmplen) {
BNXT_TF_DBG(ERR, "Key len (%d) != Expected (%d)\n",
-   tmplen, tbl->key_bit_size);
+   tmplen, tbl->blob_key_bit_size);
return -EINVAL;
}
 
aparms.mask = ulp_blob_data_get(&mask, &tmplen);
-   if (tbl->key_bit_size != tmplen) {
+   if (tbl->blob_key_bit_size != tmplen) {
BNXT_TF_DBG(ERR, "Mask len (%d) != Expected (%d)\n",
-   tmplen, tbl->key_bit_size);
+   tmplen, tbl->blob_key_bit_size);
return -EINVAL;
}
 
diff --git a/drivers/net/bnxt/tf_ulp/ulp_utils.c 
b/drivers/net/bnxt/tf_ulp/ulp_utils.c
index 24474e2e27..a13a3bbf65 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_utils.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_util

[dpdk-dev] [PATCH v4 06/15] net/bnxt: add hierarchical flow counters

2020-10-26 Thread Ajit Khaparde
From: Kishore Padmanabha 

Add support for hierarchical flow counter accumulation.
In case of hierarchical flows, involving parent and child flows,
the child flow counters are aggregated to get the parent flow counter
information. This should help in cases where one ore more flows
is related to a previously offloaded flow.

Signed-off-by: Kishore Padmanabha 
Reviewed-by: Shahaji Bhosle 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c  |  92 -
 drivers/net/bnxt/tf_ulp/ulp_fc_mgr.h  |  19 +
 drivers/net/bnxt/tf_ulp/ulp_flow_db.c | 382 ++
 drivers/net/bnxt/tf_ulp/ulp_flow_db.h |  44 ++
 .../net/bnxt/tf_ulp/ulp_template_db_enum.h|   3 +-
 5 files changed, 447 insertions(+), 93 deletions(-)

diff --git a/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c 
b/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
index 41736a80df..734b419986 100644
--- a/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
+++ b/drivers/net/bnxt/tf_ulp/ulp_fc_mgr.c
@@ -21,13 +21,13 @@ static int
 ulp_fc_mgr_shadow_mem_alloc(struct hw_fc_mem_info *parms, int size)
 {
/* Allocate memory*/
-   if (parms == NULL)
+   if (!parms)
return -EINVAL;
 
parms->mem_va = rte_zmalloc("ulp_fc_info",
RTE_CACHE_LINE_ROUNDUP(size),
4096);
-   if (parms->mem_va == NULL) {
+   if (!parms->mem_va) {
BNXT_TF_DBG(ERR, "Allocate failed mem_va\n");
return -ENOMEM;
}
@@ -149,7 +149,6 @@ ulp_fc_mgr_deinit(struct bnxt_ulp_context *ctxt)
for (i = 0; i < TF_DIR_MAX; i++)
ulp_fc_mgr_shadow_mem_free(&ulp_fc_info->shadow_hw_tbl[i]);
 
-
rte_free(ulp_fc_info);
 
/* Safe to ignore on deinit */
@@ -254,7 +253,7 @@ ulp_bulk_get_flow_stats(struct tf *tfp,
stats = (uint64_t *)fc_info->shadow_hw_tbl[dir].mem_va;
parms.physical_mem_addr = (uintptr_t)fc_info->shadow_hw_tbl[dir].mem_pa;
 
-   if (stats == NULL) {
+   if (!stats) {
PMD_DRV_LOG(ERR,
"BULK: Memory not initialized id:0x%x dir:%d\n",
parms.starting_idx, dir);
@@ -274,7 +273,8 @@ ulp_bulk_get_flow_stats(struct tf *tfp,
sw_acc_tbl_entry = &fc_info->sw_acc_tbl[dir][i];
if (!sw_acc_tbl_entry->valid)
continue;
-   sw_acc_tbl_entry->pkt_count += FLOW_CNTR_PKTS(stats[i], dparms);
+   sw_acc_tbl_entry->pkt_count += FLOW_CNTR_PKTS(stats[i],
+ dparms);
sw_acc_tbl_entry->byte_count += FLOW_CNTR_BYTES(stats[i],
dparms);
}
@@ -282,7 +282,8 @@ ulp_bulk_get_flow_stats(struct tf *tfp,
return rc;
 }
 
-static int ulp_get_single_flow_stat(struct tf *tfp,
+static int ulp_get_single_flow_stat(struct bnxt_ulp_context *ctxt,
+   struct tf *tfp,
struct bnxt_ulp_fc_info *fc_info,
enum tf_dir dir,
uint32_t hw_cntr_id,
@@ -291,7 +292,7 @@ static int ulp_get_single_flow_stat(struct tf *tfp,
int rc = 0;
struct tf_get_tbl_entry_parms parms = { 0 };
enum tf_tbl_type stype = TF_TBL_TYPE_ACT_STATS_64;  /* TBD:Template? */
-   struct sw_acc_counter *sw_acc_tbl_entry = NULL;
+   struct sw_acc_counter *sw_acc_tbl_entry = NULL, *t_sw;
uint64_t stats = 0;
uint32_t sw_cntr_indx = 0;
 
@@ -318,6 +319,18 @@ static int ulp_get_single_flow_stat(struct tf *tfp,
sw_acc_tbl_entry->pkt_count = FLOW_CNTR_PKTS(stats, dparms);
sw_acc_tbl_entry->byte_count = FLOW_CNTR_BYTES(stats, dparms);
 
+   /* Update the parent counters if it is child flow */
+   if (sw_acc_tbl_entry->parent_flow_id) {
+   /* Update the parent counters */
+   t_sw = sw_acc_tbl_entry;
+   if (ulp_flow_db_parent_flow_count_update(ctxt,
+t_sw->parent_flow_id,
+t_sw->pkt_count,
+t_sw->byte_count)) {
+   PMD_DRV_LOG(ERR, "Error updating parent counters\n");
+   }
+   }
+
return rc;
 }
 
@@ -384,13 +397,17 @@ ulp_fc_mgr_alarm_cb(void *arg)
break;
}
*/
+
+   /* reset the parent accumulation counters before accumulation if any */
+   ulp_flow_db_parent_flow_count_reset(ctxt);
+
num_entries = dparms->flow_count_db_entries / 2;
for (i = 0; i < TF_DIR_MAX; i++) {
for (j = 0; j < num_entries; j++) {
if (!ulp_fc_info->sw_acc_tbl[i][j].valid)
continue;
 

[dpdk-dev] [PATCH v4 09/15] net/bnxt: refactor flow id allocation

2020-10-26 Thread Ajit Khaparde
From: Venkat Duvvuru 

Currently, the flow id is allocated inside ulp_mapper_flow_create.
However with vxlan decap feature if F2 flow comes before F1 flow
then F2 is cached and not really installed in the hardware which
means the code will return without calling ulp_mapper_flow_create.
But, ULP has to still return valid flow id to the stack.
Hence, move the flow id allocation outside ulp_mapper_flow_create.

Signed-off-by: Venkat Duvvuru 
Reviewed-by: Somnath Kotur 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c  | 109 ---
 drivers/net/bnxt/tf_ulp/ulp_def_rules.c  |  48 --
 drivers/net/bnxt/tf_ulp/ulp_mapper.c |  35 +---
 drivers/net/bnxt/tf_ulp/ulp_mapper.h |   4 +-
 drivers/net/bnxt/tf_ulp/ulp_rte_parser.h |   9 ++
 5 files changed, 132 insertions(+), 73 deletions(-)

diff --git a/drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c 
b/drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c
index c7b29824e4..47fbaba03c 100644
--- a/drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c
+++ b/drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c
@@ -74,6 +74,29 @@ bnxt_ulp_set_dir_attributes(struct ulp_rte_parser_params 
*params,
params->dir_attr |= BNXT_ULP_FLOW_ATTR_TRANSFER;
 }
 
+void
+bnxt_ulp_init_mapper_params(struct bnxt_ulp_mapper_create_parms *mapper_cparms,
+   struct ulp_rte_parser_params *params,
+   uint32_t priority, uint32_t class_id,
+   uint32_t act_tmpl, uint16_t func_id,
+   uint32_t fid,
+   enum bnxt_ulp_fdb_type flow_type)
+{
+   mapper_cparms->app_priority = priority;
+   mapper_cparms->dir_attr = params->dir_attr;
+
+   mapper_cparms->class_tid = class_id;
+   mapper_cparms->act_tid = act_tmpl;
+   mapper_cparms->func_id = func_id;
+   mapper_cparms->hdr_bitmap = ¶ms->hdr_bitmap;
+   mapper_cparms->hdr_field = params->hdr_field;
+   mapper_cparms->comp_fld = params->comp_fld;
+   mapper_cparms->act = ¶ms->act_bitmap;
+   mapper_cparms->act_prop = ¶ms->act_prop;
+   mapper_cparms->flow_type = flow_type;
+   mapper_cparms->flow_id = fid;
+}
+
 /* Function to create the rte flow. */
 static struct rte_flow *
 bnxt_ulp_flow_create(struct rte_eth_dev *dev,
@@ -85,22 +108,23 @@ bnxt_ulp_flow_create(struct rte_eth_dev *dev,
struct bnxt_ulp_mapper_create_parms mapper_cparms = { 0 };
struct ulp_rte_parser_params params;
struct bnxt_ulp_context *ulp_ctx;
+   int rc, ret = BNXT_TF_RC_ERROR;
uint32_t class_id, act_tmpl;
struct rte_flow *flow_id;
+   uint16_t func_id;
uint32_t fid;
-   int ret = BNXT_TF_RC_ERROR;
 
if (bnxt_ulp_flow_validate_args(attr,
pattern, actions,
error) == BNXT_TF_RC_ERROR) {
BNXT_TF_DBG(ERR, "Invalid arguments being passed\n");
-   goto parse_error;
+   goto parse_err1;
}
 
ulp_ctx = bnxt_ulp_eth_dev_ptr2_cntxt_get(dev);
if (!ulp_ctx) {
BNXT_TF_DBG(ERR, "ULP context is not initialized\n");
-   goto parse_error;
+   goto parse_err1;
}
 
/* Initialize the parser params */
@@ -116,56 +140,72 @@ bnxt_ulp_flow_create(struct rte_eth_dev *dev,
ULP_COMP_FLD_IDX_WR(¶ms, BNXT_ULP_CF_IDX_SVIF_FLAG,
BNXT_ULP_INVALID_SVIF_VAL);
 
+   /* Get the function id */
+   if (ulp_port_db_port_func_id_get(ulp_ctx,
+dev->data->port_id,
+&func_id)) {
+   BNXT_TF_DBG(ERR, "conversion of port to func id failed\n");
+   goto parse_err1;
+   }
+
+   /* Protect flow creation */
+   if (bnxt_ulp_cntxt_acquire_fdb_lock(ulp_ctx)) {
+   BNXT_TF_DBG(ERR, "Flow db lock acquire failed\n");
+   goto parse_err1;
+   }
+
+   /* Allocate a Flow ID for attaching all resources for the flow to.
+* Once allocated, all errors have to walk the list of resources and
+* free each of them.
+*/
+   rc = ulp_flow_db_fid_alloc(ulp_ctx, BNXT_ULP_FDB_TYPE_REGULAR,
+  func_id, &fid);
+   if (rc) {
+   BNXT_TF_DBG(ERR, "Unable to allocate flow table entry\n");
+   goto parse_err2;
+   }
+
/* Parse the rte flow pattern */
ret = bnxt_ulp_rte_parser_hdr_parse(pattern, ¶ms);
if (ret != BNXT_TF_RC_SUCCESS)
-   goto parse_error;
+   goto parse_err3;
 
/* Parse the rte flow action */
ret = bnxt_ulp_rte_parser_act_parse(actions, ¶ms);
if (ret != BNXT_TF_RC_SUCCESS)
-   goto parse_error;
+   goto parse_err3;
 
/* Perform the rte flow post process */
ret = bnxt_ulp_rte_parser_post_process(¶ms);

[dpdk-dev] [PATCH v4 11/15] net/bnxt: add VXLAN decap offload support

2020-10-26 Thread Ajit Khaparde
From: Venkat Duvvuru 

VXLAN decap offload can happen in stages. The offload request may
not come as a single flow request rather may come as two flow offload
requests F1 & F2. This patch is adding support for this two stage
offload design. The match criteria for F1 is O_DMAC, O_SMAC,
O_DST_IP, O_UDP_DPORT and actions are COUNT, MARK, JUMP. The match
criteria for F2 is O_SRC_IP, O_DST_IP, VNI and inner header fields.
F1 and F2 flow offload requests can come in any order. If F2 flow
offload request comes first then F2 can’t be offloaded as there is
no O_DMAC information in F2. In this case, F2 will be deferred until
F1 flow offload request arrives. When F1 flow offload request is
received it will have O_DMAC information. Using F1’s O_DMAC, driver
creates an L2 context entry in the hardware as part of offloading F1.
F2 will now use F1’s O_DMAC to get the L2 context id associated with
this O_DMAC and other flow fields that are cached already at the time
of deferring F2 for offloading. F2s that arrive after F1 is offloaded
will be directly programmed and not cached.

Signed-off-by: Venkat Duvvuru 
Reviewed-by: Kishore Padmanabha 
Reviewed-by: Ajit Khaparde 
---
 doc/guides/nics/bnxt.rst  |  18 +
 doc/guides/rel_notes/release_20_11.rst|   1 +
 drivers/net/bnxt/meson.build  |   1 +
 drivers/net/bnxt/tf_ulp/bnxt_tf_common.h  |   4 +-
 drivers/net/bnxt/tf_ulp/bnxt_ulp.c|  10 +
 drivers/net/bnxt/tf_ulp/bnxt_ulp.h|  12 +
 drivers/net/bnxt/tf_ulp/bnxt_ulp_flow.c   |  84 ++---
 drivers/net/bnxt/tf_ulp/ulp_flow_db.c | 149 +++--
 drivers/net/bnxt/tf_ulp/ulp_flow_db.h |   2 +
 drivers/net/bnxt/tf_ulp/ulp_mapper.c  |   1 +
 drivers/net/bnxt/tf_ulp/ulp_mapper.h  |   2 +
 drivers/net/bnxt/tf_ulp/ulp_rte_parser.c  |  75 -
 drivers/net/bnxt/tf_ulp/ulp_rte_parser.h  |   4 +-
 .../net/bnxt/tf_ulp/ulp_template_db_enum.h|   4 +-
 drivers/net/bnxt/tf_ulp/ulp_template_struct.h |   7 +
 drivers/net/bnxt/tf_ulp/ulp_tun.c | 310 ++
 drivers/net/bnxt/tf_ulp/ulp_tun.h |  92 ++
 17 files changed, 694 insertions(+), 82 deletions(-)
 create mode 100644 drivers/net/bnxt/tf_ulp/ulp_tun.c
 create mode 100644 drivers/net/bnxt/tf_ulp/ulp_tun.h

diff --git a/doc/guides/nics/bnxt.rst b/doc/guides/nics/bnxt.rst
index 2540ddd5c2..bf2ef19adb 100644
--- a/doc/guides/nics/bnxt.rst
+++ b/doc/guides/nics/bnxt.rst
@@ -703,6 +703,24 @@ Notes
   flows to be directed to one or more queues associated with the VNIC id.
   This implementation is supported only when TRUFLOW functionality is disabled.
 
+- An application can issue a VXLAN decap offload request using rte_flow API
+  either as a single rte_flow request or a combination of two stages.
+  The PMD currently supports the two stage offload design.
+  In this approach the offload request may come as two flow offload requests
+  Flow1 & Flow2.  The match criteria for Flow1 is O_DMAC, O_SMAC, O_DST_IP,
+  O_UDP_DPORT and actions are COUNT, MARK, JUMP. The match criteria for Flow2
+  is O_SRC_IP, O_DST_IP, VNI and inner header fields.
+  Flow1 and Flow2 flow offload requests can come in any order. If Flow2 flow
+  offload request comes first then Flow2 can’t be offloaded as there is
+  no O_DMAC information in Flow2. In this case, Flow2 will be deferred until
+  Flow1 flow offload request arrives. When Flow1 flow offload request is
+  received it will have O_DMAC information. Using Flow1’s O_DMAC, driver
+  creates an L2 context entry in the hardware as part of offloading Flow1.
+  Flow2 will now use Flow1’s O_DMAC to get the L2 context id associated with
+  this O_DMAC and other flow fields that are cached already at the time
+  of deferring Flow2 for offloading. Flow2 that arrive after Flow1 is offloaded
+  will be directly programmed and not cached.
+
 Note: A VNIC represents a virtual interface in the hardware. It is a resource
 in the RX path of the chip and is used to setup various target actions such as
 RSS, MAC filtering etc. for the physical function in use.
diff --git a/doc/guides/rel_notes/release_20_11.rst 
b/doc/guides/rel_notes/release_20_11.rst
index edbcaf170b..471c670317 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -148,6 +148,7 @@ New Features
   * Updated HWRM structures to 1.10.1.70 version.
   * Added TRUFLOW support for Stingray devices.
   * Added support for representors on MAIA cores of SR.
+  * Added support for VXLAN decap offload using rte_flow.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/bnxt/meson.build b/drivers/net/bnxt/meson.build
index 9c153c402b..2896337b5d 100644
--- a/drivers/net/bnxt/meson.build
+++ b/drivers/net/bnxt/meson.build
@@ -64,6 +64,7 @@ sources = files('bnxt_cpr.c',
'tf_ulp/ulp_port_db.c',
'tf_ulp/ulp_def_rules.c',
'tf_ulp/ulp_fc_mgr.c',
+   'tf_ulp/ulp_tun.c',
'tf_ulp/ulp_template

[dpdk-dev] [PATCH v4 12/15] net/bnxt: increase the size of Rx CQ

2020-10-26 Thread Ajit Khaparde
LRO aka TPA and jumbo frame support uses aggregation ring for placing
Rx buffers. These features can generate multiple Rx completions for a
single Rx packet. Increase size of Rx Completion Queue to handle TPA
and aggregation ring events.

Fixes: daef48efe5e5 ("net/bnxt: support set MTU")
Cc: sta...@dpdk.org

Signed-off-by: Ajit Khaparde 
Reviewed-by: Qingmin Liu 
Reviewed-by: Randy Schacher 
---
 drivers/net/bnxt/bnxt_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index daf9804956..3d81f610c1 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -27,7 +27,7 @@
 #define DEFAULT_RX_RING_SIZE   256
 #define DEFAULT_TX_RING_SIZE   256
 
-#define AGG_RING_SIZE_FACTOR   2
+#define AGG_RING_SIZE_FACTOR   4
 #define AGG_RING_MULTIPLIER2
 
 /* These assume 4k pages */
-- 
2.21.1 (Apple Git-122.3)



[dpdk-dev] [PATCH v4 13/15] net/bnxt: fix to reset mbuf data offset

2020-10-26 Thread Ajit Khaparde
Reset mbuf->data_off before handing the Rx packet to the application.
We were not doing this in the TPA path. It can cause applications
using this field for post processing to work incorrectly.

Fixes: 0958d8b6435d ("net/bnxt: support LRO")
Cc: sta...@dpdk.org

Signed-off-by: Ajit Khaparde 
Reviewed-by: Lance Richardson 
---
 drivers/net/bnxt/bnxt_rxr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 039217fa60..e41833cc43 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -150,6 +150,7 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
tpa_info->mbuf = mbuf;
tpa_info->len = rte_le_to_cpu_32(tpa_start->len);
 
+   mbuf->data_off = RTE_PKTMBUF_HEADROOM;
mbuf->nb_segs = 1;
mbuf->next = NULL;
mbuf->pkt_len = rte_le_to_cpu_32(tpa_start->len);
-- 
2.21.1 (Apple Git-122.3)



[dpdk-dev] [PATCH v4 14/15] net/bnxt: set thread safe flow ops flag

2020-10-26 Thread Ajit Khaparde
PMD supports thread-safe flow operations. Set the
RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE dev_flag to indicate this info
to the application. rte_flow API functions can avoid using its
own mutex for safe multi-thread flow handling.

Signed-off-by: Ajit Khaparde 
---
 doc/guides/nics/bnxt.rst   | 2 ++
 doc/guides/rel_notes/release_20_11.rst | 1 +
 drivers/net/bnxt/bnxt_ethdev.c | 6 ++
 3 files changed, 9 insertions(+)

diff --git a/doc/guides/nics/bnxt.rst b/doc/guides/nics/bnxt.rst
index bf2ef19adb..b38fc0b330 100644
--- a/doc/guides/nics/bnxt.rst
+++ b/doc/guides/nics/bnxt.rst
@@ -721,6 +721,8 @@ Notes
   of deferring Flow2 for offloading. Flow2 that arrive after Flow1 is offloaded
   will be directly programmed and not cached.
 
+- PMD supports thread-safe rte_flow operations.
+
 Note: A VNIC represents a virtual interface in the hardware. It is a resource
 in the RX path of the chip and is used to setup various target actions such as
 RSS, MAC filtering etc. for the physical function in use.
diff --git a/doc/guides/rel_notes/release_20_11.rst 
b/doc/guides/rel_notes/release_20_11.rst
index 471c670317..367ccb3248 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -149,6 +149,7 @@ New Features
   * Added TRUFLOW support for Stingray devices.
   * Added support for representors on MAIA cores of SR.
   * Added support for VXLAN decap offload using rte_flow.
+  * Added support to indicate native rte_flow API thread safety.
 
 * **Updated Cisco enic driver.**
 
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a0e01d059d..71ad05dfe9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3824,6 +3824,12 @@ bnxt_filter_ctrl_op(struct rte_eth_dev *dev,
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
return -EINVAL;
+
+   /* PMD supports thread-safe flow operations.  rte_flow API
+* functions can avoid mutex for multi-thread safety.
+*/
+   dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
+
if (BNXT_TRUFLOW_EN(bp))
*(const void **)arg = &bnxt_ulp_rte_flow_ops;
else
-- 
2.21.1 (Apple Git-122.3)



[dpdk-dev] [PATCH v4 15/15] net/bnxt: fix Rx performance by removing spinlock

2020-10-26 Thread Ajit Khaparde
From: Rahul Gupta 

The spinlock was trying to protect scenarios where rx_queue stop/start
could be initiated dynamically. Assigning bnxt_dummy_recv_pkts and
bnxt_dummy_xmit_pkts immediately to avoid concurrent access of mbuf in Rx
and cleanup path should help achieve the same result.

Fixes: 14255b351537 ("net/bnxt: fix queue start/stop operations")

Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
Signed-off-by: Rahul Gupta 
---
 drivers/net/bnxt/bnxt.h |  4 
 drivers/net/bnxt/bnxt_cpr.c | 12 
 drivers/net/bnxt/bnxt_cpr.h |  1 +
 drivers/net/bnxt/bnxt_rxq.c |  4 
 drivers/net/bnxt/bnxt_rxq.h |  3 ---
 drivers/net/bnxt/bnxt_rxr.c |  5 +
 drivers/net/bnxt/bnxt_rxr.h |  2 --
 drivers/net/bnxt/bnxt_txr.h |  2 --
 8 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 57178192d2..90ced972c0 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -890,6 +890,10 @@ void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 uint16_t bnxt_rss_hash_tbl_size(const struct bnxt *bp);
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev,
int wait_to_complete);
+uint16_t bnxt_dummy_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+uint16_t bnxt_dummy_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
 
 extern const struct rte_flow_ops bnxt_flow_ops;
 
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 91d1ffe46c..ee96ae81bf 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -121,6 +121,12 @@ void bnxt_handle_async_event(struct bnxt *bp,
PMD_DRV_LOG(INFO, "Port conn async event\n");
break;
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+   /*
+* Avoid any rx/tx packet processing during firmware reset
+* operation.
+*/
+   bnxt_stop_rxtx(bp);
+
/* Ignore reset notify async events when stopping the port */
if (!bp->eth_dev->data->dev_started) {
bp->flags |= BNXT_FLAG_FATAL_ERROR;
@@ -337,3 +343,9 @@ bool bnxt_is_recovery_enabled(struct bnxt *bp)
 
return false;
 }
+
+void bnxt_stop_rxtx(struct bnxt *bp)
+{
+   bp->eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
+   bp->eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
+}
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index cccd6cdbe0..ff9697f4c8 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -126,4 +126,5 @@ void bnxt_wait_for_device_shutdown(struct bnxt *bp);
 bool bnxt_is_recovery_enabled(struct bnxt *bp);
 bool bnxt_is_master_func(struct bnxt *bp);
 
+void bnxt_stop_rxtx(struct bnxt *bp);
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 78514143e5..e0ec342162 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -210,8 +210,6 @@ void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
if (!rxq || !rxq->rx_ring)
return;
 
-   rte_spinlock_lock(&rxq->lock);
-
sw_ring = rxq->rx_ring->rx_buf_ring;
if (sw_ring) {
for (i = 0;
@@ -248,7 +246,6 @@ void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
}
}
 
-   rte_spinlock_unlock(&rxq->lock);
 }
 
 void bnxt_free_rx_mbufs(struct bnxt *bp)
@@ -389,7 +386,6 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
rxq->rx_started = true;
}
eth_dev->data->rx_queue_state[queue_idx] = queue_state;
-   rte_spinlock_init(&rxq->lock);
 
/* Configure mtu if it is different from what was configured before */
if (!queue_idx)
diff --git a/drivers/net/bnxt/bnxt_rxq.h b/drivers/net/bnxt/bnxt_rxq.h
index 201bda2269..c72105cf06 100644
--- a/drivers/net/bnxt/bnxt_rxq.h
+++ b/drivers/net/bnxt/bnxt_rxq.h
@@ -16,9 +16,6 @@ struct bnxt;
 struct bnxt_rx_ring_info;
 struct bnxt_cp_ring_info;
 struct bnxt_rx_queue {
-   rte_spinlock_t  lock;   /* Synchronize between rx_queue_stop
-* and fast path
-*/
struct rte_mempool  *mb_pool; /* mbuf pool for RX ring */
uint64_tmbuf_initializer; /* val to init mbuf */
uint16_tnb_rx_desc; /* num of RX desc */
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index e41833cc43..4a8326e335 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -843,8 +843,7 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
return 0;
 
/* If Rx Q was stopped return */
-   if (unlikely(!rxq->rx_started ||
-!rte_spinlock_trylock(&rxq->lock)))
+

Re: [dpdk-dev] [PATCH v4 1/3] net/ice: add AVX512 vector path

2020-10-26 Thread David Marchand
On Mon, Oct 26, 2020 at 8:13 AM Rong, Leyi  wrote:
> Would prefer using __AVX512F__ instead of RTE_MACHINE_CPUFLAG_AVX512F here 
> rather than remove the RTE_MACHINE_CPUFLAG_ macro directly to check the CPU 
> capability.
> So the judgment statement will be
> if cc.get_define('__AVX512F__', args: machine_args) != '' or (not 
> machine_args.contains('-mno-avx512f') and cc.has_argument('-mavx512f'))
>
> what do you think?

No opinion as I have yet to understand the subtleties to control
enablement of avx stuff.
Bruce?


-- 
David Marchand



Re: [dpdk-dev] [PATCH] net/bnxt: fix incorrect boolean operator usage

2020-10-26 Thread Ajit Khaparde
On Thu, Oct 22, 2020 at 11:45 AM Lance Richardson
 wrote:
>
> Use boolean AND operator instead of bitwise operator.
>
> Coverity issue: 323488
> Fixes: b42c15c83e88 ("net/bnxt: support trusted VF")
> Signed-off-by: Lance Richardson 
> Reviewed-by: Ajit Khaparde 
Patch applied to dpdk-next-net-brcm.

> ---
>  drivers/net/bnxt/bnxt_ethdev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index aa2d9e00e..3ee4b5524 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -1547,7 +1547,7 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev 
> *eth_dev,
> if (rc)
> return rc;
>
> -   if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
> +   if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) {
> PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF 
> interface\n");
> return -ENOTSUP;
> }
> --
> 2.25.1
>


Re: [dpdk-dev] [PATCH] net/bnxt: use shorter SIMD initializers

2020-10-26 Thread Ajit Khaparde
On Thu, Oct 22, 2020 at 11:51 AM Lance Richardson
 wrote:
>
> Make SIMD initialization code less verbose by using appropriate
> intrinsics when all lanes of a vector are initialized to the
> same value.
>
> Signed-off-by: Lance Richardson 
> Reviewed-by: Ajit Khaparde 
Patch applied to dpdk-next-net-brcm.

> ---
>  drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++
>  drivers/net/bnxt/bnxt_rxtx_vec_sse.c  | 37 +
>  2 files changed, 23 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c 
> b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index f49e29ccb..de1d96570 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t 
> mm_rxcmp1[4],
> 0xFF, 0xFF,/* vlan_tci (zeroes) */
> 12, 13, 14, 15 /* rss hash */
> };
> -   const uint32x4_t flags_type_mask = {
> -   RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -   RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -   RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> -   RX_PKT_CMPL_FLAGS_ITYPE_MASK
> -   };
> -   const uint32x4_t flags2_mask1 = {
> -   RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -   RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -   RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> -   RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> -   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
> -   };
> -   const uint32x4_t flags2_mask2 = {
> -   RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -   RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -   RX_PKT_CMPL_FLAGS2_IP_TYPE,
> -   RX_PKT_CMPL_FLAGS2_IP_TYPE
> -   };
> -   const uint32x4_t rss_mask = {
> -   RX_PKT_CMPL_FLAGS_RSS_VALID,
> -   RX_PKT_CMPL_FLAGS_RSS_VALID,
> -   RX_PKT_CMPL_FLAGS_RSS_VALID,
> -   RX_PKT_CMPL_FLAGS_RSS_VALID
> -   };
> -   const uint32x4_t flags2_index_mask = {
> -   0x1F, 0x1F, 0x1F, 0x1F
> -   };
> -   const uint32x4_t flags2_error_mask = {
> -   0xF, 0xF, 0xF, 0xF
> -   };
> +   const uint32x4_t flags_type_mask =
> +   vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> +   const uint32x4_t flags2_mask1 =
> +   vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> +   RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> +   const uint32x4_t flags2_mask2 =
> +   vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
> +   const uint32x4_t rss_mask =
> +   vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID);
> +   const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F);
> +   const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F);
> uint32x4_t flags_type, flags2, index, errors, rss_flags;
> uint32x4_t tmp, ptype_idx;
> uint64x2_t t0, t1;
> @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf 
> **rx_pkts,
> uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
> struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
> uint64_t valid, desc_valid_mask = ~0UL;
> -   const uint32x4_t info3_v_mask = {
> -   CMPL_BASE_V, CMPL_BASE_V,
> -   CMPL_BASE_V, CMPL_BASE_V
> -   };
> +   const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V);
> uint32_t raw_cons = cpr->cp_raw_cons;
> uint32_t cons, mbcons;
> int nb_rx_pkts = 0;
> const uint64x2_t mb_init = {rxq->mbuf_initializer, 0};
> -   const uint32x4_t valid_target = {
> -   !!(raw_cons & cp_ring_size),
> -   !!(raw_cons & cp_ring_size),
> -   !!(raw_cons & cp_ring_size),
> -   !!(raw_cons & cp_ring_size)
> -   };
> +   const uint32x4_t valid_target =
> +   vdupq_n_u32(!!(raw_cons & cp_ring_size));
> int i;
>
> /* If Rx Q was stopped return */
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c 
> b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index e4ba63551..e12bf8bb7 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
>  0xFF, 0xFF, 3, 2,/* pkt_len */
>  0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
> const __m128i flags_type_mask =
> -   _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> +   _mm_set1_epi32(RX_PKT

Re: [dpdk-dev] [PATCH] net/bnxt: update PMD supported features

2020-10-26 Thread Ajit Khaparde
On Thu, Oct 22, 2020 at 1:20 PM Lance Richardson
 wrote:
>
> Mark "BSD nic_uio", "Usage doc", and "Perf doc" as supported
> for the bnxt PMD.
>
> Signed-off-by: Lance Richardson 
> Reviewed-by: Ajit Kumar Khaparde 
Patch applied to dpdk-next-net-brcm.

> ---
>  doc/guides/nics/features/bnxt.ini | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/doc/guides/nics/features/bnxt.ini 
> b/doc/guides/nics/features/bnxt.ini
> index e75cfc44d..f8a7fd9a1 100644
> --- a/doc/guides/nics/features/bnxt.ini
> +++ b/doc/guides/nics/features/bnxt.ini
> @@ -45,8 +45,11 @@ FW version   = Y
>  EEPROM dump  = Y
>  LED  = Y
>  Multiprocess aware   = Y
> +BSD nic_uio  = Y
>  Linux UIO= Y
>  Linux VFIO   = Y
>  ARMv8= Y
>  x86-32   = Y
>  x86-64   = Y
> +Usage doc= Y
> +Perf doc = Y
> --
> 2.25.1
>


[dpdk-dev] [PATCH v4] examples/tep_term: deprecate this example

2020-10-26 Thread Xiaoyun Li
This example sets up a scenario that VXLAN packets can be received
by different PF queues based on VNID and each queue is bound to a VM
with a VNID so that the VM can receive its inner packets.

Usually, OVS is used to do the software encap/decap for VXLAN packets.

And the VXLAN packets offloading can be replaced with flow rules in
testpmd like Chapter "Sample VXLAN flow rules" in Testpmd Application
User Guide.

And this example hasn't been used for a long time.

So deprecate this example.

Signed-off-by: Xiaoyun Li 
Acked-by: Thomas Monjalon 
Acked-by: Andrew Rybchenko 
Acked-by: Ferruh Yigit 
---
v4:
 * Rebased to the newest dpdk branch.
v3:
 * Added release note in removed items.
v2:
 * Polished the commit log.
 * Added the doc for testpmd VXLAN flow rules which can replace the
 * offload that this example wanted.
---
 MAINTAINERS   |4 -
 doc/guides/rel_notes/release_20_11.rst|2 +
 .../img/tep_termination_arch.svg  | 1400 -
 doc/guides/sample_app_ug/index.rst|1 -
 doc/guides/sample_app_ug/tep_termination.rst  |  214 ---
 doc/guides/testpmd_app_ug/testpmd_funcs.rst   |   43 +
 examples/meson.build  |2 +-
 examples/tep_termination/Makefile |   45 -
 examples/tep_termination/main.c   | 1235 ---
 examples/tep_termination/main.h   |   93 --
 examples/tep_termination/meson.build  |   16 -
 examples/tep_termination/vxlan.c  |  243 ---
 examples/tep_termination/vxlan.h  |   57 -
 examples/tep_termination/vxlan_setup.c|  443 --
 examples/tep_termination/vxlan_setup.h|   58 -
 15 files changed, 46 insertions(+), 3810 deletions(-)
 delete mode 100644 doc/guides/sample_app_ug/img/tep_termination_arch.svg
 delete mode 100644 doc/guides/sample_app_ug/tep_termination.rst
 delete mode 100644 examples/tep_termination/Makefile
 delete mode 100644 examples/tep_termination/main.c
 delete mode 100644 examples/tep_termination/main.h
 delete mode 100644 examples/tep_termination/meson.build
 delete mode 100644 examples/tep_termination/vxlan.c
 delete mode 100644 examples/tep_termination/vxlan.h
 delete mode 100644 examples/tep_termination/vxlan_setup.c
 delete mode 100644 examples/tep_termination/vxlan_setup.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b1d232f53..5b390d1d84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1670,10 +1670,6 @@ M: John McNamara 
 F: examples/skeleton/
 F: doc/guides/sample_app_ug/skeleton.rst
 
-TEP termination example
-M: Xiaoyun Li 
-F: examples/tep_termination/
-
 VMDq examples
 F: examples/vmdq/
 F: doc/guides/sample_app_ug/vmdq_forwarding.rst
diff --git a/doc/guides/rel_notes/release_20_11.rst 
b/doc/guides/rel_notes/release_20_11.rst
index d8ac359e51..7d8f40c100 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -369,6 +369,8 @@ Removed Items
 
 * Removed Python 2 support since it was EOL'd in January 2020.
 
+* Removed TEP termination sample application.
+
 API Changes
 ---
 
diff --git a/doc/guides/sample_app_ug/img/tep_termination_arch.svg 
b/doc/guides/sample_app_ug/img/tep_termination_arch.svg
deleted file mode 100644
index 54f1655cdc..00
--- a/doc/guides/sample_app_ug/img/tep_termination_arch.svg
+++ /dev/null
@@ -1,1400 +0,0 @@
-
-
-
-http://schemas.microsoft.com/visio/2003/SVGExtensions/";
-   xmlns:dc="http://purl.org/dc/elements/1.1/";
-   xmlns:cc="http://creativecommons.org/ns#";
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-   xmlns:svg="http://www.w3.org/2000/svg";
-   xmlns="http://www.w3.org/2000/svg";
-   xmlns:xlink="http://www.w3.org/1999/xlink";
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd";
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape";
-   width="4.9898949in"
-   height="6.1537709in"
-   viewBox="0 0 359.27243 443.07151"
-   xml:space="preserve"
-   class="st24"
-   id="svg2"
-   version="1.1"
-   inkscape:version="0.91 r13725"
-   sodipodi:docname="tep_termination_arch.svg"
-   
style="font-size:12px;overflow:visible;color-interpolation-filters:sRGB;fill:none;fill-rule:evenodd;stroke-linecap:square;stroke-miterlimit:3">image/svg+xmlhttp://purl.org/dc/dcmitype/StillImage"; 
/>

Re: [dpdk-dev] [PATCH v3 0/2] LPM changes

2020-10-26 Thread David Marchand
On Fri, Oct 23, 2020 at 11:39 AM David Marchand
 wrote:
>
> From Ruifeng Wang:
>
> The rte_lpm structure is exported because lookup API is inlined.
> But most of the structure can be hidden.
> Discussion at: http://patches.dpdk.org/patch/72403/
> This patch set aimed to hide the rte_lpm structure as much as possible.
>
> A data free issue was identified and fixed.

Series applied.
Thanks Ruifeng.


-- 
David Marchand



[dpdk-dev] [PATCH] common/mlx5: fix PCI driver name

2020-10-26 Thread Bing Zhao
In the refactor of mlx5 common layer, the PCI driver name to the RTE
device was changed from "net_mlx5" to "mlx5_pci". The string of name
"mlx5_pci" is used directly in the structure rte_pci_driver.

In the past, a macro "MLX5_DRIVER_NAME" is used instead of any direct
string, and now it is missing. The functions that use
"MLX5_DRIVER_NAME" will get some mismatch, e.g mlx5_eth_find_next.

It needs to use this macro again in all code to make everything get
aligned.

Fixes: 8a41f4deccc3 ("common/mlx5: introduce layer for multiple class drivers")
Cc: pa...@mellanox.com
Cc: sta...@dpdk.org

Signed-off-by: Bing Zhao 
Reviewed-by: Parav Pandit 
Acked-by: Matan Azrad 
---
 drivers/common/mlx5/mlx5_common.h | 2 ++
 drivers/common/mlx5/mlx5_common_pci.c | 2 +-
 drivers/net/mlx5/mlx5_defs.h  | 3 ---
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index ed44a45..3d3d109 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -18,6 +18,8 @@
 #include "mlx5_prm.h"
 #include "mlx5_devx_cmds.h"
 
+/* Reported driver name. */
+#define MLX5_DRIVER_NAME "mlx5_pci"
 
 /* Bit-field manipulation. */
 #define BITFIELD_DECLARE(bf, type, size) \
diff --git a/drivers/common/mlx5/mlx5_common_pci.c 
b/drivers/common/mlx5/mlx5_common_pci.c
index 02417c6..5208972 100644
--- a/drivers/common/mlx5/mlx5_common_pci.c
+++ b/drivers/common/mlx5/mlx5_common_pci.c
@@ -408,7 +408,7 @@ class_name_to_value(const char *class_name)
 
 static struct rte_pci_driver mlx5_pci_driver = {
.driver = {
-   .name = "mlx5_pci",
+   .name = MLX5_DRIVER_NAME,
},
.probe = mlx5_common_pci_probe,
.remove = mlx5_common_pci_remove,
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 42916ed..2657081 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -11,9 +11,6 @@
 
 #include "mlx5_autoconf.h"
 
-/* Reported driver name. */
-#define MLX5_DRIVER_NAME "net_mlx5"
-
 /* Maximum number of simultaneous VLAN filters. */
 #define MLX5_MAX_VLAN_IDS 128
 
-- 
1.8.3.1



[dpdk-dev] [PATCH v1 2/2] vdpa/mlx5: hardware error handling

2020-10-26 Thread Xueming Li
When hardware error happens, vdpa didn't get such information and leave
driver in silent: working state but no response.

This patch subscribes firmware virtq error event and try to recover max
3 times in 10 seconds, stop virtq if max retry number reached.

When error happens, PMD log in warning level. If failed to recover,
outputs error log. Query virtq statistics to get error counters report.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c   |   2 +
 drivers/vdpa/mlx5/mlx5_vdpa.h   |  37 
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 140 
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c |  61 +---
 4 files changed, 225 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index a8f3e4b1de..ba779c10ee 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -283,6 +283,7 @@ mlx5_vdpa_dev_close(int vid)
}
if (priv->configured)
ret |= mlx5_vdpa_lm_log(priv);
+   mlx5_vdpa_err_event_unset(priv);
mlx5_vdpa_cqe_event_unset(priv);
mlx5_vdpa_steer_unset(priv);
mlx5_vdpa_virtqs_release(priv);
@@ -318,6 +319,7 @@ mlx5_vdpa_dev_config(int vid)
DRV_LOG(WARNING, "MTU cannot be set on device %s.",
vdev->device->name);
if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) ||
+   mlx5_vdpa_err_event_setup(priv) ||
mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
mlx5_vdpa_cqe_event_setup(priv)) {
mlx5_vdpa_dev_close(vid);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index fcbc12ab0c..0d6886c52c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -87,6 +87,7 @@ struct mlx5_vdpa_virtq {
uint16_t vq_size;
uint8_t notifier_state;
bool stopped;
+   uint32_t version;
struct mlx5_vdpa_priv *priv;
struct mlx5_devx_obj *virtq;
struct mlx5_devx_obj *counters;
@@ -97,6 +98,8 @@ struct mlx5_vdpa_virtq {
uint32_t size;
} umems[3];
struct rte_intr_handle intr_handle;
+   uint64_t err_time[3]; /* RDTSC time of recent errors. */
+   uint32_t n_retry;
struct mlx5_devx_virtio_q_couners_attr reset;
 };
 
@@ -143,8 +146,10 @@ struct mlx5_vdpa_priv {
struct rte_vhost_memory *vmem;
uint32_t eqn;
struct mlx5dv_devx_event_channel *eventc;
+   struct mlx5dv_devx_event_channel *err_chnl;
struct mlx5dv_devx_uar *uar;
struct rte_intr_handle intr_handle;
+   struct rte_intr_handle err_intr_handle;
struct mlx5_devx_obj *td;
struct mlx5_devx_obj *tis;
uint16_t nr_virtqs;
@@ -259,6 +264,25 @@ int mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv);
  */
 void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
 
+/**
+ * Setup error interrupt handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv);
+
+/**
+ * Unset error event handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ */
+void mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv);
+
 /**
  * Release a virtq and all its related resources.
  *
@@ -392,6 +416,19 @@ int mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, 
int state);
  */
 int mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index);
 
+/**
+ * Query virtq information.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ * @param[in] index
+ *   The virtq index.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index);
+
 /**
  * Get virtq statistics.
  *
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 8a01e42794..89df699dad 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -15,11 +15,14 @@
 #include 
 
 #include 
+#include 
 
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_ERROR_TIME_SEC 3u
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -378,6 +381,143 @@ mlx5_vdpa_interrupt_handler(void *cb_arg)
pthread_mutex_unlock(&priv->vq_config_lock);
 }
 
+static void
+mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
+{
+#ifdef HAVE_IBV_DEVX_EVENT
+   struct mlx5_vdpa_priv *priv = cb_arg;
+   union {
+   struct mlx5dv_devx_async_event_hdr event_resp;
+   uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
+   } out;
+   uint32_t vq_index, i, version;
+   struct mlx5_vdpa_virtq *virtq;
+   uint64_t sec;
+
+   pthread_mutex_lock(&priv->vq_config_lock);
+  

[dpdk-dev] [PATCH v1 1/2] common/mlx5: add virtq attributes error fields

2020-10-26 Thread Xueming Li
Add the needed fields for virtq DevX object to read the error state.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
 drivers/common/mlx5/mlx5_prm.h   | 9 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 8aee12d527..dc426e9b09 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1754,6 +1754,9 @@ mlx5_devx_cmd_query_virtq(struct mlx5_devx_obj *virtq_obj,
attr->hw_available_index = MLX5_GET16(virtio_net_q, virtq,
  hw_available_index);
attr->hw_used_index = MLX5_GET16(virtio_net_q, virtq, hw_used_index);
+   attr->state = MLX5_GET16(virtio_net_q, virtq, state);
+   attr->error_type = MLX5_GET16(virtio_net_q, virtq,
+ virtio_q_context.error_type);
return ret;
 }
 
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index abbea67784..0ea2427b75 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -298,6 +298,7 @@ struct mlx5_devx_virtq_attr {
uint32_t size;
uint64_t offset;
} umems[3];
+   uint8_t error_type;
 };
 
 
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index d342263c85..7d671a3996 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2280,7 +2280,8 @@ struct mlx5_ifc_virtio_q_bits {
u8 used_addr[0x40];
u8 available_addr[0x40];
u8 virtio_q_mkey[0x20];
-   u8 reserved_at_160[0x20];
+   u8 reserved_at_160[0x18];
+   u8 error_type[0x8];
u8 umem_1_id[0x20];
u8 umem_1_size[0x20];
u8 umem_1_offset[0x40];
@@ -2308,7 +2309,7 @@ struct mlx5_ifc_virtio_net_q_bits {
u8 vhost_log_page[0x5];
u8 reserved_at_90[0xc];
u8 state[0x4];
-   u8 error_type[0x8];
+   u8 reserved_at_a0[0x8];
u8 tisn_or_qpn[0x18];
u8 dirty_bitmap_mkey[0x20];
u8 dirty_bitmap_size[0x20];
@@ -2329,6 +2330,10 @@ struct mlx5_ifc_query_virtq_out_bits {
struct mlx5_ifc_virtio_net_q_bits virtq;
 };
 
+enum {
+   MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27,
+};
+
 enum {
MLX5_QP_ST_RC = 0x0,
 };
-- 
2.25.1



[dpdk-dev] [PATCH] net/mlx4: fix glue library name

2020-10-26 Thread Ali Alnubani
The MLX4 library wasn't being successfully initialized with
-Dibverbs_link=dlopen because it expected a shared object file
with a different name.

Fixes: a20b2c01a7a1 ("build: standardize component names and defines")
Cc: bruce.richard...@intel.com

Signed-off-by: Ali Alnubani 
---
 drivers/net/mlx4/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
index 404006515..c22a88875 100644
--- a/drivers/net/mlx4/meson.build
+++ b/drivers/net/mlx4/meson.build
@@ -10,7 +10,7 @@ endif
 
 static_ibverbs = (get_option('ibverbs_link') == 'static')
 dlopen_ibverbs = (get_option('ibverbs_link') == 'dlopen')
-LIB_GLUE_BASE = 'librte_pmd_mlx4_glue.so'
+LIB_GLUE_BASE = 'librte_net_mlx4_glue.so'
 LIB_GLUE_VERSION = '18.02.0'
 LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
 if dlopen_ibverbs
-- 
2.28.0



[dpdk-dev] [PATCH] common/mlx5: fix glue library name

2020-10-26 Thread Ali Alnubani
The MLX5 glue library wasn't following the standard
'librte__.so' naming.

Fixes: a20b2c01a7a1 ("build: standardize component names and defines")
Cc: bruce.richard...@intel.com

Signed-off-by: Ali Alnubani 
---
 drivers/common/mlx5/linux/meson.build | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/linux/meson.build 
b/drivers/common/mlx5/linux/meson.build
index 9ef8e181d..0d437f8fb 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -5,7 +5,7 @@ includes += include_directories('.')
 
 static_ibverbs = (get_option('ibverbs_link') == 'static')
 dlopen_ibverbs = (get_option('ibverbs_link') == 'dlopen')
-LIB_GLUE_BASE = 'librte_pmd_mlx5_glue.so'
+LIB_GLUE_BASE = 'librte_common_mlx5_glue.so'
 LIB_GLUE_VERSION = '20.02.0'
 LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
 if dlopen_ibverbs
@@ -195,7 +195,7 @@ configure_file(output : 'mlx5_autoconf.h', configuration : 
config)
 # Build Glue Library
 if dlopen_ibverbs
dlopen_name = 'mlx5_glue'
-   dlopen_lib_name = 'rte_pmd_@0@'.format(dlopen_name)
+   dlopen_lib_name = 'rte_common_' + dlopen_name
dlopen_so_version = LIB_GLUE_VERSION
dlopen_sources = files('mlx5_glue.c')
dlopen_install_dir = [ eal_pmd_path + '-glue' ]
-- 
2.28.0



Re: [dpdk-dev] [PATCH v2 2/6] net/mlx5: add support for two ports hairpin mode

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Bing Zhao 
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [PATCH v2 2/6] net/mlx5: add support for two ports hairpin mode
> 
> In order to support hairpin between two ports, mlx5 PMD needs to implement
> the functions and provide them as the function pointers.
> 
> The bind and unbind functions are executed per port pairs. All the hairpin
> queues between the two ports should have the same attributes during queues
> setup. Different configurations among queue pairs from the same ports are not
> supported. It is allowed that two ports only have one direction hairpin.
> 
> In order to set up the connection between two queues, peer Rx queue HW
> information must be fetched via the internal RTE API and the queue
> information could be used to modify the SQ object. Then the RQ object will be
> modified with the Tx queue HW information. The reverse operation is not
> supported right now.
> 
> When disconnecting the queues pair, SQ and RQ object should be reset
> without any peer HW information. The unbinding operation will try to
> disconnect all Tx queues from the port from the Rx queues of the peer port.
> 
> Tx explicit mode attribute will be saved and used when creating a hairpin 
> flow.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 

> ---
>  drivers/net/mlx5/linux/mlx5_os.c |  10 +
>  drivers/net/mlx5/mlx5.h  |  19 ++
>  drivers/net/mlx5/mlx5_rxtx.h |   2 +
>  drivers/net/mlx5/mlx5_trigger.c  | 611
> ++-
>  4 files changed, 640 insertions(+), 2 deletions(-)
> 


Re: [dpdk-dev] [PATCH v2 1/6] net/mlx5: change hairpin queue peer checking

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: dev  On Behalf Of Bing Zhao
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [dpdk-dev] [PATCH v2 1/6] net/mlx5: change hairpin queue peer
> checking
> 
> In the current implementation of single port mode hairpin, the peer queue
> should belong to the same port of the current queue. When the two ports
> hairpin mode is introduced, such checking should be removed to make the
> hairpin queue setup execute successfully since it is not a valid condition
> anymore.
> 
> In the meanwhile, different devices could have different queue configurations.
> The queues number of peer port is unknown to the current device. The
> checking should be removed also.
> 
> If the Tx and Rx port IDs of a hairpin peer are different, only the manual
> binding and explicit Tx flows are supported. Or else, the four combinations of
> modes could be supported. The mode attributes consistency checking will be
> done when connecting the queue with its peer queue.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 

> ---
>  drivers/net/mlx5/mlx5_rxq.c | 23 +--
> drivers/net/mlx5/mlx5_txq.c | 23 +--
>  2 files changed, 34 insertions(+), 12 deletions(-)


Re: [dpdk-dev] [PATCH v2 3/6] net/mlx5: add support to get hairpin peer ports

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Bing Zhao 
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [PATCH v2 3/6] net/mlx5: add support to get hairpin peer ports
> 
> In real-life business, one device could be attached and detached dynamically.
> The hairpin configuration of this port to/from all the other ports should be
> enabled and disabled accordingly.
> 
> The RTE ethdev lib and PMD should provide this ability to get the peer ports
> list in case that the application doesn't save it. It is recommended that the 
> size
> of the array to save the port IDs is as large as the "RTE_MAX_ETHPORTS" to
> have the maximal capacity.
> 
> The order of the peer port IDs may be different from that during hairpin
> queues set in the initialization stage. The peer port ID could be the same as 
> the
> current device port ID when the hairpin peer ports contain itself - the single
> port hairpin.
> 
> The application should check the ports' status and decide if the peer port
> should be bound / unbound when starting / stopping the current device.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 

> ---
>  drivers/net/mlx5/linux/mlx5_os.c |  2 +
>  drivers/net/mlx5/mlx5.h  |  2 +
>  drivers/net/mlx5/mlx5_trigger.c  | 89
> 
>  3 files changed, 93 insertions(+)
> 


Re: [dpdk-dev] [PATCH v2 5/6] net/mlx5: change hairpin ingress flow validation

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Bing Zhao 
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [PATCH v2 5/6] net/mlx5: change hairpin ingress flow validation
> 
> In the current implementation of the single port hairpin, there is a implicit
> splitting process for actions. When inserting a hairpin flow, all the actions 
> will
> be included with the ingress attribute.
> The flow engine will check and decide which actions should be moved into the
> TX flow part, e.g., encapsulation, VLAN push.
> 
> In some NICs, some actions can only be done in one direction. Since the
> hairpin flow will be split into two parts, such validation will be skipped.
> 
> With the hairpin explicit TX flow mode, no splitting is needed any more. The
> hairpin flow may have no big difference from a standard flow (except the
> queue). The application should take full charge of the actions and the flow
> engine should validate the hairpin flow in the same way as other flows.
> 
> In the meanwhile, a new internal API is added to get the hairpin 
> configuration.
> This will bypass the useless atomic operation to save the CPU cycles.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 

> ---
>  drivers/net/mlx5/mlx5_flow_dv.c | 15 ---
>  drivers/net/mlx5/mlx5_rxq.c | 27 +++
>  drivers/net/mlx5/mlx5_rxtx.h|  2 ++
>  3 files changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 15cd34e..d5be6f0 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -6058,11 +6058,17 @@ struct field_modify_info modify_tcp[] = {
> actions,
> "no fate action is found");
>   }
> - /* Continue validation for Xcap and VLAN actions.*/
> + /*
> +  * Continue validation for Xcap and VLAN actions.
> +  * If hairpin is working in explicit TX rule mode, there is no actions
> +  * splitting and the validation of hairpin ingress flow should be the
> +  * same as other standard flows.
> +  */
>   if ((action_flags & (MLX5_FLOW_XCAP_ACTIONS |
>MLX5_FLOW_VLAN_ACTIONS)) &&
>   (queue_index == 0x ||
> -  mlx5_rxq_get_type(dev, queue_index) !=
> MLX5_RXQ_TYPE_HAIRPIN)) {
> +  mlx5_rxq_get_type(dev, queue_index) !=
> MLX5_RXQ_TYPE_HAIRPIN ||
> +  !!mlx5_rxq_get_hairpin_conf(dev, queue_index)->tx_explicit)) {
>   if ((action_flags & MLX5_FLOW_XCAP_ACTIONS) ==
>   MLX5_FLOW_XCAP_ACTIONS)
>   return rte_flow_error_set(error, ENOTSUP, @@ -
> 6091,7 +6097,10 @@ struct field_modify_info modify_tcp[] = {
>"multiple VLAN actions");
>   }
>   }
> - /* Hairpin flow will add one more TAG action. */
> + /*
> +  * Hairpin flow will add one more TAG action in TX implicit mode.
> +  * In TX explicit mode, there will be no hairpin flow ID.
> +  */
>   if (hairpin > 0)
>   rw_act_num += MLX5_ACT_NUM_SET_TAG;
>   /* extra metadata enabled: one more TAG action will be add. */ diff --
> git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index
> 78e15e7..d328d4a 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1720,6 +1720,33 @@ enum mlx5_rxq_type
>   return MLX5_RXQ_TYPE_UNDEFINED;
>  }
> 
> +/*
> + * Get a Rx hairpin queue configuration.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param idx
> + *   Rx queue index.
> + *
> + * @return
> + *   Pointer to the configuration if a hairpin RX queue, otherwise NULL.
> + */
> +const struct rte_eth_hairpin_conf *
> +mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) {
> + struct mlx5_priv *priv = dev->data->dev_private;
> + struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
> +
> + if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) {
> + rxq_ctrl = container_of((*priv->rxqs)[idx],
> + struct mlx5_rxq_ctrl,
> + rxq);
> + if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
> + return &rxq_ctrl->hairpin_conf;
> + }
> + return NULL;
> +}
> +
>  /**
>   * Get an indirection table.
>   *
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index
> b50b643..d91ed0f 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -344,6 +344,8 @@ uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev,  int
> mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hxrq_idx);  int
> mlx5_hrxq_verify(struct rte_eth_dev *dev);  enum mlx5_rxq_type
> mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx);
> +const struct r

Re: [dpdk-dev] [PATCH v2 6/6] net/mlx5: not split hairpin flow in explicit mode

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Bing Zhao 
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [PATCH v2 6/6] net/mlx5: not split hairpin flow in explicit mode
> 
> In the current implementation, the hairpin flow will be split into two flows
> implicitly if there is some action that only belongs to the TX part. A TX 
> device
> flow will be inserted by the mlx5 PMD itself.
> 
> In hairpin between two ports, the explicit TX flow mode will be the only one 
> to
> be supported. It is not the appropriate behavior to insert a TX flow into
> another device implicitly. The application could create any flow as it likes 
> and
> has full control of the user flows. Hairpin flows will have no difference from
> standard flows and the application can decide how to chain RX and TX flows
> together.
> 
> Even in the single port hairpin, this explicit TX flow mode could also be
> supported.
> 
> When checking if the hairpin needs to be split, just return if the hairpin 
> queue
> is with "tx_explicit" attribute. Then in the following steps for validation 
> and
> translation, the code path will be the same as that for standard flows.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 

> ---
>  drivers/net/mlx5/mlx5_flow.c | 7 +++
>  1 file changed, 7 insertions(+)
> 


Re: [dpdk-dev] [PATCH v2 4/6] net/mlx5: conditional hairpin auto bind

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Bing Zhao 
> Sent: Thursday, October 22, 2020 17:07
> To: viachesl...@mellanox.com; ma...@mellanox.com
> Cc: dev@dpdk.org; Ori Kam ; Raslan Darawsheh
> 
> Subject: [PATCH v2 4/6] net/mlx5: conditional hairpin auto bind
> 
> In single port hairpin mode, after the queues are configured during start up.
> The binding process will be enabled automatically in the port start phase and
> the default control flow for egress will be created.
> 
> When switching to two ports hairpin mode, the auto binding process should be
> skipped if there is no TX queue with the peer RX queue on the same device,
> and it should be skipped also if the queues are configured with manual bind
> attribute.
> 
> If the explicit TX flow rule mode is configured or hairpin is between two 
> ports,
> the default control flows for TX queues should not be created.
> 
> Signed-off-by: Bing Zhao 
Acked-by: Viacheslav Ovsiienko 
> ---
>  drivers/net/mlx5/mlx5_trigger.c | 33 +++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 


Re: [dpdk-dev] [PATCH v8] net/iavf: support flex desc metadata extraction

2020-10-26 Thread Olivier Matz
Hi,

On Wed, Oct 14, 2020 at 01:31:39PM +0100, Ferruh Yigit wrote:
> On 10/13/2020 9:17 AM, Jeff Guo wrote:
> > Enable metadata extraction for flexible descriptors in AVF, that would
> > allow network function directly get metadata without additional parsing
> > which would reduce the CPU cost for VFs. The enabling metadata
> > extractions involve the metadata of VLAN/IPv4/IPv6/IPv6-FLOW/TCP/MPLS
> > flexible descriptors, and the VF could negotiate the capability of
> > the flexible descriptor with PF and correspondingly configure the
> > specific offload at receiving queues.
> > 
> > Signed-off-by: Jeff Guo 
> > Acked-by: Haiyue Wang 

[...]

> > +EXPERIMENTAL {
> > +global:
> > +
> > +# added in 20.11
> > +rte_net_iavf_dynfield_proto_xtr_metadata_offs;
> > +rte_net_iavf_dynflag_proto_xtr_vlan_mask;
> > +rte_net_iavf_dynflag_proto_xtr_ipv4_mask;
> > +rte_net_iavf_dynflag_proto_xtr_ipv6_mask;
> > +rte_net_iavf_dynflag_proto_xtr_ipv6_flow_mask;
> > +rte_net_iavf_dynflag_proto_xtr_tcp_mask;
> > +rte_net_iavf_dynflag_proto_xtr_ip_offset_mask;
> 
> As a namespace previously "rte_pmd_xxx" was used for PMD specific APIs, can
> you please switch to that?
> 'rte_net_' is used by the 'librte_net' library.
> 
> Above list is the dynfield values, what is the correct usage for dynfields,
> 1- Put dynfileds names in to the header, and application does a lookup
> ('rte_mbuf_dynfield_lookup()') to get the dynfield values.
> or
> 2- Expose dynfield values to be accessed directly from application, as done 
> above.
> 
> @Oliver, can you please support.
> 
> I can see (1) has advantage of portability if more than one PMD supports
> same dynfield names, but that sees not a case for above ones.

If I understand the question correctly, this is the same that was
discussed here:

  http://inbox.dpdk.org/dev/20191030165626.w3flq5wdpitpsv2v@platinum/

To me, exporting the variables containing the dynfield offsets is easier
to use: we don't need to have additional private variables to store them
in each API users (usually one static variable per file, which can be
heavy).

Olivier


Re: [dpdk-dev] [PATCH 10/15] test/distributor: switch sequence to dynamic mbuf field

2020-10-26 Thread Lukasz Wojciechowski
Changes in distributor tests look good.
All tests pass

W dniu 26.10.2020 o 06:21, Thomas Monjalon pisze:
> The test used the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.
>
> Signed-off-by: Thomas Monjalon 
Acked-by: Lukasz Wojciechowski 
Tested-by: Lukasz Wojciechowski 

> ---
>   app/test/test_distributor.c | 27 ++-
>   1 file changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
> index eb889b91d1..074a06f4a2 100644
> --- a/app/test/test_distributor.c
> +++ b/app/test/test_distributor.c
> @@ -10,6 +10,7 @@
>   #include 
>   #include 
>   #include 
> +#include 
>   #include 
>   #include 
>   
> @@ -17,6 +18,10 @@
>   #define BURST 32
>   #define BIG_BATCH 1024
>   
> +static int seq_dynfield_offset;
> +#define SEQ_FIELD(mbuf) \
> + (*RTE_MBUF_DYNFIELD(mbuf, seq_dynfield_offset, uint32_t *))
> +
>   struct worker_params {
>   char name[64];
>   struct rte_distributor *dist;
> @@ -578,7 +583,7 @@ handle_and_mark_work(void *arg)
>   __atomic_fetch_add(&worker_stats[id].handled_packets, num,
>   __ATOMIC_RELAXED);
>   for (i = 0; i < num; i++)
> - buf[i]->udata64 += id + 1;
> + SEQ_FIELD(buf[i]) += id + 1;
>   num = rte_distributor_get_pkt(db, id,
>   buf, buf, num);
>   }
> @@ -631,10 +636,10 @@ sanity_mark_test(struct worker_params *wp, struct 
> rte_mempool *p)
>   << shift;
>   }
>   /* Assign a sequence number to each packet. The sequence is shifted,
> -  * so that lower bits of the udate64 will hold mark from worker.
> +  * so that lower bits will hold mark from worker.
>*/
>   for (i = 0; i < buf_count; i++)
> - bufs[i]->udata64 = i << seq_shift;
> + SEQ_FIELD(bufs[i]) = i << seq_shift;
>   
>   count = 0;
>   for (i = 0; i < buf_count/burst; i++) {
> @@ -660,8 +665,8 @@ sanity_mark_test(struct worker_params *wp, struct 
> rte_mempool *p)
>   
>   /* Sort returned packets by sent order (sequence numbers). */
>   for (i = 0; i < buf_count; i++) {
> - seq = returns[i]->udata64 >> seq_shift;
> - id = returns[i]->udata64 - (seq << seq_shift);
> + seq = SEQ_FIELD(returns[i]) >> seq_shift;
> + id = SEQ_FIELD(returns[i]) - (seq << seq_shift);
>   sorted[seq] = id;
>   }
>   
> @@ -805,6 +810,18 @@ test_distributor(void)
>   static struct rte_mempool *p;
>   int i;
>   
> + static const struct rte_mbuf_dynfield seq_dynfield_desc = {
> + .name = "test_distributor_dynfield_seq",
> + .size = sizeof(uint32_t),
> + .align = __alignof__(uint32_t),
> + };
> + seq_dynfield_offset =
> + rte_mbuf_dynfield_register(&seq_dynfield_desc);
> + if (seq_dynfield_offset < 0) {
> + printf("Error registering mbuf field\n");
> + return TEST_FAILED;
> + }
> +
>   if (rte_lcore_count() < 2) {
>   printf("Not enough cores for distributor_autotest, expecting at 
> least 2\n");
>   return TEST_SKIPPED;

-- 
Lukasz Wojciechowski
Principal Software Engineer

Samsung R&D Institute Poland
Samsung Electronics
Office +48 22 377 88 25
l.wojciec...@partner.samsung.com



[dpdk-dev] [PATCH v3 3/6] net/mlx5: register multiple pool for Rx queue

2020-10-26 Thread Viacheslav Ovsiienko
The split feature for receiving packets was added to the mlx5
PMD, now Rx queue can receive the data to the buffers belonging
to the different pools and the memory of all the involved pool
must be registered for DMA operations in order to allow hardware
to store the data.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_mr.c  |  3 +++
 drivers/net/mlx5/mlx5_trigger.c | 20 
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index dbcf0aa..c308ecc 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -536,6 +536,9 @@ struct mr_update_mp_data {
.ret = 0,
};
 
+   DRV_LOG(DEBUG, "Port %u Rx queue registering mp %s "
+  "having %u chunks.", dev->data->port_id,
+  mp->name, mp->nb_mem_chunks);
rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data);
if (data.ret < 0 && rte_errno == ENXIO) {
/* Mempool may have externally allocated memory. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7735f02..19f2d66 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -145,18 +145,22 @@
dev->data->port_id, priv->sh->device_attr.max_sge);
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
-   struct rte_mempool *mp;
 
if (!rxq_ctrl)
continue;
if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-   /* Pre-register Rx mempool. */
-   mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
-rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
-   DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s"
-   " having %u chunks.", dev->data->port_id,
-   rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks);
-   mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
+   /* Pre-register Rx mempools. */
+   if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
+   mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
+ rxq_ctrl->rxq.mprq_mp);
+   } else {
+   uint32_t s;
+
+   for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
+   mlx5_mr_update_mp
+   (dev, &rxq_ctrl->rxq.mr_ctrl,
+   rxq_ctrl->rxq.rxseg[s].mp);
+   }
ret = rxq_alloc_elts(rxq_ctrl);
if (ret)
goto error;
-- 
1.8.3.1



[dpdk-dev] [PATCH v3 0/6] net/mlx5: add Rx buffer split support

2020-10-26 Thread Viacheslav Ovsiienko
This patch adds to PMD the functionality for the receiving
buffer split feasture [1]

[1] http://patches.dpdk.org/patch/81154/

Signed-off-by: Viacheslav Ovsiienko 

---
v1: http://patches.dpdk.org/patch/81808/

v2: http://patches.dpdk.org/patch/81923/
- typos
- documentation is updated

v3: - extra parameter checks in PMD rx_queue_setup removed
- minor optimizations in PMD

Viacheslav Ovsiienko (6):
  net/mlx5: add extended Rx queue setup routine
  net/mlx5: configure Rx queue to support split
  net/mlx5: register multiple pool for Rx queue
  net/mlx5: update Rx datapath to support split
  net/mlx5: report Rx segmentation capabilities
  doc: add buffer split feature limitation to mlx5 guide

 doc/guides/nics/mlx5.rst|   6 +-
 drivers/net/mlx5/mlx5.h |   3 +
 drivers/net/mlx5/mlx5_ethdev.c  |   4 ++
 drivers/net/mlx5/mlx5_mr.c  |   3 +
 drivers/net/mlx5/mlx5_rxq.c | 144 +---
 drivers/net/mlx5/mlx5_rxtx.c|   3 +-
 drivers/net/mlx5/mlx5_rxtx.h|  13 +++-
 drivers/net/mlx5/mlx5_trigger.c |  20 +++---
 8 files changed, 161 insertions(+), 35 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v3 1/6] net/mlx5: add extended Rx queue setup routine

2020-10-26 Thread Viacheslav Ovsiienko
The routine to provide Rx queue setup with specifying
extended receiving buffer description is added.
It allows application to specify desired segment
lengths, data position offsets in the buffer
and dedicated memory pool for each segment.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5.h  |  3 +++
 drivers/net/mlx5/mlx5_rxq.c  | 39 ++-
 drivers/net/mlx5/mlx5_rxtx.h | 13 -
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9d5d71..03c4128 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -164,6 +164,9 @@ struct mlx5_stats_ctrl {
 /* Maximal size of aggregated LRO packet. */
 #define MLX5_MAX_LRO_SIZE (UINT8_MAX * MLX5_LRO_SEG_CHUNK_SIZE)
 
+/* Maximal number of segments to split. */
+#define MLX5_MAX_RXQ_NSEG (1u << MLX5_MAX_LOG_RQ_SEGS)
+
 /* LRO configurations structure. */
 struct mlx5_lro_config {
uint32_t supported:1; /* Whether LRO is supported. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index e1783ba..ffb83de 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -731,12 +731,40 @@
struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+   struct rte_eth_rxseg_split *rx_seg =
+   (struct rte_eth_rxseg_split *)conf->rx_seg;
+   struct rte_eth_rxseg_split rx_single = {.mp = mp};
+   uint16_t n_seg = conf->rx_nseg;
int res;
 
+   if (mp) {
+   /*
+* The parameters should be checked on rte_eth_dev layer.
+* If mp is specified it means the compatible configuration
+* without buffer split feature tuning.
+*/
+   rx_seg = &rx_single;
+   n_seg = 1;
+   }
+   if (n_seg > 1) {
+   uint64_t offloads = conf->offloads |
+   dev->data->dev_conf.rxmode.offloads;
+
+   /* The offloads should be checked on rte_eth_dev layer. */
+   MLX5_ASSERT(offloads & DEV_RX_OFFLOAD_SCATTER);
+   if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
+   DRV_LOG(ERR, "port %u queue index %u split "
+"offload not configured",
+dev->data->port_id, idx);
+   rte_errno = ENOSPC;
+   return -rte_errno;
+   }
+   MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG);
+   }
res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
if (res)
return res;
-   rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
+   rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, n_seg);
if (!rxq_ctrl) {
DRV_LOG(ERR, "port %u unable to allocate queue index %u",
dev->data->port_id, idx);
@@ -1329,11 +1357,11 @@
 struct mlx5_rxq_ctrl *
 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 unsigned int socket, const struct rte_eth_rxconf *conf,
-struct rte_mempool *mp)
+const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg)
 {
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
-   unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+   unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp);
unsigned int mprq_stride_nums;
unsigned int mprq_stride_size;
unsigned int mprq_stride_cap;
@@ -1347,7 +1375,8 @@ struct mlx5_rxq_ctrl *
uint64_t offloads = conf->offloads |
   dev->data->dev_conf.rxmode.offloads;
unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
-   const int mprq_en = mlx5_check_mprq_support(dev) > 0;
+   const int mprq_en = mlx5_check_mprq_support(dev) > 0 && n_seg == 1 &&
+   !rx_seg[0].offset && !rx_seg[0].length;
unsigned int max_rx_pkt_len = lro_on_queue ?
dev->data->dev_conf.rxmode.max_lro_pkt_size :
dev->data->dev_conf.rxmode.max_rx_pkt_len;
@@ -1532,7 +1561,7 @@ struct mlx5_rxq_ctrl *
(!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
tmpl->rxq.port_id = dev->data->port_id;
tmpl->priv = priv;
-   tmpl->rxq.mp = mp;
+   tmpl->rxq.mp = rx_seg[0].mp;
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.rq_repl_thresh =
MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b243b6f..f3af9bd 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -94,6 +94,13 @@ enum mlx5_rxq_err_state {

[dpdk-dev] [PATCH v3 4/6] net/mlx5: update Rx datapath to support split

2020-10-26 Thread Viacheslav Ovsiienko
Only the regular rx_burst routine is updated to support split,
because the vectorized ones does not support scatter and MPRQ
does not support split at all.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_rxq.c  | 11 +--
 drivers/net/mlx5/mlx5_rxtx.c |  3 ++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 17fd89e..a19ca7c 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -210,9 +210,10 @@
 
/* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
+   struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n];
struct rte_mbuf *buf;
 
-   buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
+   buf = rte_pktmbuf_alloc(seg->mp);
if (buf == NULL) {
DRV_LOG(ERR, "port %u empty mbuf pool",
PORT_ID(rxq_ctrl->priv));
@@ -225,12 +226,10 @@
MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0);
MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
MLX5_ASSERT(!buf->next);
-   /* Only the first segment keeps headroom. */
-   if (i % sges_n)
-   SET_DATA_OFF(buf, 0);
+   SET_DATA_OFF(buf, seg->offset);
PORT(buf) = rxq_ctrl->rxq.port_id;
-   DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
-   PKT_LEN(buf) = DATA_LEN(buf);
+   DATA_LEN(buf) = seg->length;
+   PKT_LEN(buf) = seg->length;
NB_SEGS(buf) = 1;
(*rxq_ctrl->rxq.elts)[i] = buf;
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b530ff4..dd84249 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1334,7 +1334,8 @@ enum mlx5_txcmp_code {
rte_prefetch0(seg);
rte_prefetch0(cqe);
rte_prefetch0(wqe);
-   rep = rte_mbuf_raw_alloc(rxq->mp);
+   /* Allocate the buf from the same pool. */
+   rep = rte_mbuf_raw_alloc(seg->pool);
if (unlikely(rep == NULL)) {
++rxq->stats.rx_nombuf;
if (!pkt) {
-- 
1.8.3.1



[dpdk-dev] [PATCH v3 6/6] doc: add buffer split feature limitation to mlx5 guide

2020-10-26 Thread Viacheslav Ovsiienko
The buffer split feature is mentioned in the mlx5 PMD
documentation, the limitation is description is added
as well.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 doc/guides/nics/mlx5.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 1a8808e..4621a5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -64,7 +64,8 @@ Features
 
 - Multi arch support: x86_64, POWER8, ARMv8, i686.
 - Multiple TX and RX queues.
-- Support for scattered TX and RX frames.
+- Support for scattered TX frames.
+- Advanced support for scattered Rx frames with tunable buffer attributes.
 - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
 - RSS using different combinations of fields: L3 only, L4 only or both,
   and source only, destination only or both.
@@ -192,6 +193,9 @@ Limitations
the device. In case of ungraceful program termination, some entries may
remain present and should be removed manually by other means.
 
+- Buffer split offload is supported with regular Rx burst routine only,
+  no MPRQ feature or vectorized code can be engaged.
+
 - When Multi-Packet Rx queue is configured (``mprq_en``), a Rx packet can be
   externally attached to a user-provided mbuf with having EXT_ATTACHED_MBUF in
   ol_flags. As the mempool for the external buffer is managed by PMD, all the
-- 
1.8.3.1



[dpdk-dev] [PATCH v3 5/6] net/mlx5: report Rx segmentation capabilities

2020-10-26 Thread Viacheslav Ovsiienko
Add rte_eth_dev_info->rx_seg_capa parameters:
  - receiving to multiple pools is supported
  - buffer offsets are supported
  - no offset alignment requirement
  - reports the maximal number of segments
  - reports the buffer split offload flag

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_ethdev.c | 4 
 drivers/net/mlx5/mlx5_rxq.c| 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 7631f64..9017184 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -306,6 +306,10 @@
info->max_tx_queues = max;
info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES;
info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev);
+   info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG;
+   info->rx_seg_capa.multi_pools = 1;
+   info->rx_seg_capa.offset_allowed = 1;
+   info->rx_seg_capa.offset_align_log2 = 0;
info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
 info->rx_queue_offload_capa);
info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index a19ca7c..88e8911 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -389,6 +389,7 @@
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_config *config = &priv->config;
uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
+RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT |
 DEV_RX_OFFLOAD_TIMESTAMP |
 DEV_RX_OFFLOAD_JUMBO_FRAME |
 DEV_RX_OFFLOAD_RSS_HASH);
-- 
1.8.3.1



[dpdk-dev] [PATCH v3 2/6] net/mlx5: configure Rx queue to support split

2020-10-26 Thread Viacheslav Ovsiienko
The scatter-gather elements should be configured
accordingly to support the buffer split feature.
The application provides the desired settings for
the segments at the beginning of the packets and
PMD pads the buffer chain (if needed) with attributes
of last specified segment to accommodate the packet
of maximal length.

There are some limitations are implied. The MPRQ
feature should be disengaged if split is requested,
due to MPRQ neither supports pushing data to the
dedicated pools nor follows the flexible buffer sizes.
The vectorized rx_burst routines does not support
the scattering (these ones are extremely simplified
and work over the single segment only) and can't
handle split as well.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_rxq.c | 93 ++---
 1 file changed, 80 insertions(+), 13 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index ffb83de..17fd89e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1384,22 +1384,89 @@ struct mlx5_rxq_ctrl *
RTE_PKTMBUF_HEADROOM;
unsigned int max_lro_size = 0;
unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
+   const struct rte_eth_rxseg_split *qs_seg = rx_seg;
+   unsigned int tail_len;
 
-   if (non_scatter_min_mbuf_size > mb_len && !(offloads &
-   DEV_RX_OFFLOAD_SCATTER)) {
+   tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
+  desc_n * sizeof(struct rte_mbuf *), 0, socket);
+   if (!tmpl) {
+   rte_errno = ENOMEM;
+   return NULL;
+   }
+   MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
+   /*
+* Build the array of actual buffer offsets and lengths.
+* Pad with the buffers from the last memory pool if
+* needed to handle max size packets, replace zero length
+* with the buffer length from the pool.
+*/
+   tail_len = max_rx_pkt_len;
+   do {
+   struct mlx5_eth_rxseg *hw_seg =
+   &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n];
+   uint32_t buf_len, offset, seg_len;
+
+   /*
+* For the buffers beyond descriptions offset is zero,
+* the first buffer contains head room.
+*/
+   buf_len = rte_pktmbuf_data_room_size(qs_seg->mp);
+   offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) +
+(tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM);
+   /*
+* For the buffers beyond descriptions the length is
+* pool buffer length, zero lengths are replaced with
+* pool buffer length either.
+*/
+   seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len :
+  qs_seg->length ?
+  qs_seg->length :
+  (buf_len - offset);
+   /* Check is done in long int, now overflows. */
+   if (buf_len < seg_len + offset) {
+   DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length "
+"%u/%u can't be satisfied",
+dev->data->port_id, idx,
+qs_seg->length, qs_seg->offset);
+   rte_errno = EINVAL;
+   goto error;
+   }
+   if (seg_len > tail_len)
+   seg_len = buf_len - offset;
+   if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) {
+   DRV_LOG(ERR,
+   "port %u too many SGEs (%u) needed to handle"
+   " requested maximum packet size %u, the maximum"
+   " supported are %u", dev->data->port_id,
+   tmpl->rxq.rxseg_n, max_rx_pkt_len,
+   MLX5_MAX_RXQ_NSEG);
+   rte_errno = ENOTSUP;
+   goto error;
+   }
+   /* Build the actual scattering element in the queue object. */
+   hw_seg->mp = qs_seg->mp;
+   MLX5_ASSERT(offset <= UINT16_MAX);
+   MLX5_ASSERT(seg_len <= UINT16_MAX);
+   hw_seg->offset = (uint16_t)offset;
+   hw_seg->length = (uint16_t)seg_len;
+   /*
+* Advance the segment descriptor, the padding is the based
+* on the attributes of the last descriptor.
+*/
+   if (tmpl->rxq.rxseg_n < n_seg)
+   qs_seg++;
+   tail_len -= RTE_MIN(tail_len, seg_len);
+   

Re: [dpdk-dev] [PATCH v1] net/ice: refactor dynamic mbuf in data extraction

2020-10-26 Thread Olivier Matz
Hi Haiyue,

On Sun, Oct 25, 2020 at 03:13:52PM +0800, Haiyue Wang wrote:
> Current dynamic mbuf design is that the driver will register the needed
> field and flags at the device probing time, this will make iavf PMD use
> different names to register the dynamic mbuf field and flags, but both
> of them use the exactly same protocol extraction metadata.
> 
> This will run out of the limited dynamic mbuf resource, meanwhile, the
> application has to handle the dynamic mbuf separately.
> 
> For making things simple and consistent, refactor dynamic mbuf in data
> extraction handling: the PMD just lookups the same name at the queue
> setup time after the application registers it.
> 
> In other words, make the dynamic mbuf string name as API, not the data
> object which is defined in each PMD.

In case the dynamic mbuf field is shared by several PMDs, it seems to
be indeed a better solution.

Currently, the "union rte_pmd_proto_xtr_metadata" is still defined in
rte_pmd_ice.h. Will it be the same for iavf, and will it be factorized
somewhere? However I don't know where could be a good place.

There is already lib/librte_mbuf/rte_mbuf_dyn.h which is the place to
centralize the name and description of dynamic fields/flags used in
libraries. But I think neither structure definitions nor PMD-specific
flags should go there too. I'd prefer to have them in
drivers/net/, but I'm not sure it is possible.

Also, it is difficult from the patch to see the impact it has on an
application that was using these metadata. Should we have an example
of use?

Thanks,
Olivier


> Signed-off-by: Haiyue Wang 
> ---
>  drivers/net/ice/ice_ethdev.c  | 104 ++--
>  drivers/net/ice/ice_ethdev.h  |   1 +
>  drivers/net/ice/ice_rxtx.c|  49 
>  drivers/net/ice/ice_rxtx.h|   1 +
>  drivers/net/ice/rte_pmd_ice.h | 219 +-
>  drivers/net/ice/version.map   |  13 --
>  6 files changed, 68 insertions(+), 319 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
> index 51b99c6506..ec27089cfa 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -32,42 +32,6 @@ static const char * const ice_valid_args[] = {
>   NULL
>  };
>  
> -static const struct rte_mbuf_dynfield ice_proto_xtr_metadata_param = {
> - .name = "ice_dynfield_proto_xtr_metadata",
> - .size = sizeof(uint32_t),
> - .align = __alignof__(uint32_t),
> - .flags = 0,
> -};
> -
> -struct proto_xtr_ol_flag {
> - const struct rte_mbuf_dynflag param;
> - uint64_t *ol_flag;
> - bool required;
> -};
> -
> -static bool ice_proto_xtr_hw_support[PROTO_XTR_MAX];
> -
> -static struct proto_xtr_ol_flag ice_proto_xtr_ol_flag_params[] = {
> - [PROTO_XTR_VLAN] = {
> - .param = { .name = "ice_dynflag_proto_xtr_vlan" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_vlan_mask },
> - [PROTO_XTR_IPV4] = {
> - .param = { .name = "ice_dynflag_proto_xtr_ipv4" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv4_mask },
> - [PROTO_XTR_IPV6] = {
> - .param = { .name = "ice_dynflag_proto_xtr_ipv6" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_mask },
> - [PROTO_XTR_IPV6_FLOW] = {
> - .param = { .name = "ice_dynflag_proto_xtr_ipv6_flow" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask },
> - [PROTO_XTR_TCP] = {
> - .param = { .name = "ice_dynflag_proto_xtr_tcp" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_tcp_mask },
> - [PROTO_XTR_IP_OFFSET] = {
> - .param = { .name = "ice_dynflag_proto_xtr_ip_offset" },
> - .ol_flag = &rte_net_ice_dynflag_proto_xtr_ip_offset_mask },
> -};
> -
>  #define ICE_DFLT_OUTER_TAG_TYPE ICE_AQ_VSI_OUTER_TAG_VLAN_9100
>  
>  #define ICE_OS_DEFAULT_PKG_NAME  "ICE OS Default Package"
> @@ -542,7 +506,7 @@ handle_proto_xtr_arg(__rte_unused const char *key, const 
> char *value,
>  }
>  
>  static void
> -ice_check_proto_xtr_support(struct ice_hw *hw)
> +ice_check_proto_xtr_support(struct ice_pf *pf, struct ice_hw *hw)
>  {
>  #define FLX_REG(val, fld, idx) \
>   (((val) & GLFLXP_RXDID_FLX_WRD_##idx##_##fld##_M) >> \
> @@ -587,7 +551,7 @@ ice_check_proto_xtr_support(struct ice_hw *hw)
>  
>   if (FLX_REG(v, PROT_MDID, 4) == xtr_sets[i].protid_0 &&
>   FLX_REG(v, RXDID_OPCODE, 4) == xtr_sets[i].opcode)
> - ice_proto_xtr_hw_support[i] = true;
> + pf->hw_proto_xtr_ena[i] = 1;
>   }
>  
>   if (xtr_sets[i].protid_1 != ICE_PROT_ID_INVAL) {
> @@ -595,7 +559,7 @@ ice_check_proto_xtr_support(struct ice_hw *hw)
>  
>   if (FLX_REG(v, PROT_MDID, 5) == xtr_sets[i].protid_1 &&
>   FLX_REG(v, RXDID_OPCODE, 5) == xtr_sets[i].opcode)
> - ice_proto_xtr_hw_s

[dpdk-dev] [PATCH v1] vhost: fix gpa to hpa conversion

2020-10-26 Thread Patrick Fu
gpa_to_hpa() function almost always fails due to the wrong setup of
the b tree search key. Since there has been already a similar function
gpa_to_first_hap() available in the vhost, instead of fixing the
issue in its original logic, gpa_to_hpa() function is rewritten to be
a wrapper of the gpa_to_first_hpa() to avoid code redundancy.

Fixes: e246896178e6 ("vhost: get guest/host physical address mappings")
Fixes: faa9867c4da2 ("vhost: use binary search in address conversion")

Signed-off-by: Patrick Fu 
---
 lib/librte_vhost/vhost.h | 43 ++--
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 75d79f80a..361c9f79b 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -563,38 +563,6 @@ static __rte_always_inline int guest_page_addrcmp(const 
void *p1,
return 0;
 }
 
-/* Convert guest physical address to host physical address */
-static __rte_always_inline rte_iova_t
-gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
-{
-   uint32_t i;
-   struct guest_page *page;
-   struct guest_page key;
-
-   if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
-   key.guest_phys_addr = gpa;
-   page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
-  sizeof(struct guest_page), guest_page_addrcmp);
-   if (page) {
-   if (gpa + size < page->guest_phys_addr + page->size)
-   return gpa - page->guest_phys_addr +
-   page->host_phys_addr;
-   }
-   } else {
-   for (i = 0; i < dev->nr_guest_pages; i++) {
-   page = &dev->guest_pages[i];
-
-   if (gpa >= page->guest_phys_addr &&
-   gpa + size < page->guest_phys_addr +
-   page->size)
-   return gpa - page->guest_phys_addr +
-  page->host_phys_addr;
-   }
-   }
-
-   return 0;
-}
-
 static __rte_always_inline rte_iova_t
 gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
uint64_t gpa_size, uint64_t *hpa_size)
@@ -645,6 +613,17 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
return 0;
 }
 
+/* Convert guest physical address to host physical address */
+static __rte_always_inline rte_iova_t
+gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
+{
+   rte_iova_t hpa;
+   uint64_t hpa_size;
+
+   hpa = gpa_to_first_hpa(dev, gpa, size, &hpa_size);
+   return hpa_size == size ? hpa : 0;
+}
+
 static __rte_always_inline uint64_t
 hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
 {
-- 
2.18.4



Re: [dpdk-dev] [PATCH] app/testpmd: set default RSS key as null

2020-10-26 Thread Ferruh Yigit

On 10/21/2020 11:07 AM, Lijun Ou wrote:

From: Ophir Munk 

When creating an RSS rule without specifying a key (see [1]) it is
expected that the device will use the default key.
A NULL key is used to indicate to a PMD it should use
its default key, however testpmd assigns a non-NULL dummy key
(see [2]) instead.
This does not enable testing any PMD behavior when the RSS key is not
specified. This commit fixes this limitation by setting key to NULL.
Also, it fixes the Scenario [3] that enable default RSS action by
setting key=NULL and key_len!=0.
[1]
RSS rule example without specifying a key:
flow create 0 ingress  / end actions rss queues 0 1 end / end
[2]
Testpmd default key assignment:
.key= "testpmd's default RSS hash key, "
"override it for better balancing"
[3]
flow create 0  actions rss queues 0 1 end key_len 40 / end

fixes refer to the link: https://patches.dpdk.org/patch/80898/

Signed-off-by: Ophir Munk 
Signed-off-by: Ferruh Yigit 
Signed-off-by: Lijun Ou 


Applied to dpdk-next-net/main, thanks.


Updated the commit log as below:

Author: Lijun Ou 
Date:   Wed Oct 21 18:07:10 2020 +0800

app/testpmd: fix RSS key for flow API RSS rule

When a flow API RSS rule is issued in testpmd, device RSS key is changed
unexpectedly, device RSS key is changed to the testpmd default RSS key.

Consider the following usage with testpmd:
1. first, startup testpmd:
 testpmd> show port 0 rss-hash key
 RSS functions: all ipv4-frag ipv4-other ipv6-frag ipv6-other ip
 RSS key: 6D5A56DA255B0EC24167253D43A38FB0D0CA2BCBAE7B30B477CB2DA38030F
  20C6A42B73BBEAC01FA
2. create a rss rule
 testpmd> flow create 0 ingress pattern eth / ipv4 / udp / end \
  actions rss types ipv4-udp end queues end / end

3. show rss-hash key
 testpmd> show port 0 rss-hash key
 RSS functions: all ipv4-udp udp
 RSS key: 74657374706D6427732064656661756C74205253532068617368206B65792
  C206F76657272696465

This is because testpmd always sends a key with the RSS rule,
if user provides a key as part of the rule that key is used, if user
doesn't provide a key, testpmd default key is sent to the PMDs, which is
causing device programmed RSS key to be changed.

There was a previous attempt to fix the same issue [1], but it has been
reverted back [2] because of the crash when 'key_len' is provided
without 'key'.

This patch follows the same approach with the initial fix [1] but also
addresses the crash.

After change, testpmd RSS key is 'NULL' by default, if user provides a
key as part of rule it is used, if not no key is sent to the PMDs at all

[1]
Commit a4391f8bae85 ("app/testpmd: set default RSS key as null")

[2]
Commit f3698c3d09a6 ("app/testpmd: revert setting default RSS")

Fixes: d0ad8648b1c5 ("app/testpmd: fix RSS flow action configuration")
Cc: sta...@dpdk.org

Signed-off-by: Lijun Ou 
Signed-off-by: Ophir Munk 
Signed-off-by: Ferruh Yigit 



Re: [dpdk-dev] [PATCH v3] mbuf: fix dynamic flags lookup from secondary process

2020-10-26 Thread Olivier Matz
Hi Stephen,

On Fri, Oct 23, 2020 at 05:43:31PM -0700, Stephen Hemminger wrote:
> The dynamic flag management is broken if rte_mbuf_dynflag_lookup()
> is done in a secondary process because the local pointer to
> the memzone is not ever initialized.
> 
> Fix it by using the same checks as dynfield_register().
> I.e if shared memory zone has not been looked up already,
> then discover it.
> 
> Fixes: 4958ca3a443a ("mbuf: support dynamic fields and flags")
> Cc: olivier.m...@6wind.com
> Signed-off-by: Stephen Hemminger 
> ---
> 
> v3 - change title, fix one extra whitespace 
> 
>  lib/librte_mbuf/rte_mbuf_dyn.c | 20 
>  1 file changed, 8 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c
> index 538a43f6959f..554ec5a1ca4f 100644
> --- a/lib/librte_mbuf/rte_mbuf_dyn.c
> +++ b/lib/librte_mbuf/rte_mbuf_dyn.c
> @@ -185,13 +185,11 @@ rte_mbuf_dynfield_lookup(const char *name, struct 
> rte_mbuf_dynfield *params)
>  {
>   struct mbuf_dynfield_elt *mbuf_dynfield;
>  
> - if (shm == NULL) {
> - rte_errno = ENOENT;
> - return -1;
> - }
> -
>   rte_mcfg_tailq_read_lock();
> - mbuf_dynfield = __mbuf_dynfield_lookup(name);
> + if (shm == NULL && init_shared_mem() < 0)
> + mbuf_dynfield = NULL;
> + else
> + mbuf_dynfield = __mbuf_dynfield_lookup(name);
>   rte_mcfg_tailq_read_unlock();
>  
>   if (mbuf_dynfield == NULL) {
>   rte_errno = ENOENT;
>   return -1;

There is still a small corner case here: on a primary process,
init_shared_mem() can return -1 in case rte_memzone_reserve_aligned()
returns a NULL memzone. In this situation, rte_errno is set by the
memzone layer by overriden to ENOENT.

Maybe something like this is better, what do you think?

@@ -172,7 +172,7 @@ __mbuf_dynfield_lookup(const char *name)
break;
}
 
-   if (te == NULL) {
+   if (te == NULL || mbuf_dynfield == NULL) {
rte_errno = ENOENT;
return NULL;
}
@@ -185,19 +185,15 @@ rte_mbuf_dynfield_lookup(const char *name, struct 
rte_mbuf_dynfield *params)
 {
struct mbuf_dynfield_elt *mbuf_dynfield;
 
-   if (shm == NULL) {
-   rte_errno = ENOENT;
-   return -1;
-   }
-
rte_mcfg_tailq_read_lock();
-   mbuf_dynfield = __mbuf_dynfield_lookup(name);
+   if (shm == NULL && init_shared_mem() < 0)
+   mbuf_dynfield = NULL;
+   else
+   mbuf_dynfield = __mbuf_dynfield_lookup(name);
rte_mcfg_tailq_read_unlock();
 
-   if (mbuf_dynfield == NULL) {
-   rte_errno = ENOENT;
+   if (mbuf_dynfield == NULL)
return -1;
-   }
 
if (params != NULL)
memcpy(params, &mbuf_dynfield->params, sizeof(*params));



Thanks,
Olivier


> @@ -384,13 +382,11 @@ rte_mbuf_dynflag_lookup(const char *name,
>  {
>   struct mbuf_dynflag_elt *mbuf_dynflag;
>  
> - if (shm == NULL) {
> - rte_errno = ENOENT;
> - return -1;
> - }
> -
>   rte_mcfg_tailq_read_lock();
> - mbuf_dynflag = __mbuf_dynflag_lookup(name);
> + if (shm == NULL && init_shared_mem() < 0)
> + mbuf_dynflag = NULL;
> + else
> + mbuf_dynflag = __mbuf_dynflag_lookup(name);
>   rte_mcfg_tailq_read_unlock();
>  
>   if (mbuf_dynflag == NULL) {
> -- 
> 2.27.0
> 


Re: [dpdk-dev] [PATCH 04/15] node: switch IPv4 metadata to dynamic mbuf field

2020-10-26 Thread David Marchand
On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
>
> The node_mbuf_priv1 was stored in the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.
>
> Signed-off-by: Thomas Monjalon 
> ---
>  lib/librte_node/ip4_lookup.c   |  7 +++
>  lib/librte_node/ip4_rewrite.c  | 10 ++
>  lib/librte_node/node_private.h | 12 ++--
>  3 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c
> index 8835aab9dd..2cc91c0c67 100644
> --- a/lib/librte_node/ip4_lookup.c
> +++ b/lib/librte_node/ip4_lookup.c
> @@ -21,6 +21,8 @@
>
>  #include "node_private.h"
>
> +int node_mbuf_priv1_dynfield_offset;
> +
>  #define IPV4_L3FWD_LPM_MAX_RULES 1024
>  #define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
>
> @@ -178,6 +180,11 @@ ip4_lookup_node_init(const struct rte_graph *graph, 
> struct rte_node *node)
> RTE_SET_USED(node);
>
> if (!init_once) {
> +   node_mbuf_priv1_dynfield_offset = rte_mbuf_dynfield_register(
> +   &node_mbuf_priv1_dynfield_desc);
> +   if (node_mbuf_priv1_dynfield_offset < 0)
> +   return -1;

How about return -rte_errno like what is done in setup_lpm()?


> +
> /* Setup LPM tables for all sockets */
> RTE_LCORE_FOREACH(lcore_id)
> {

[snip]

> diff --git a/lib/librte_node/node_private.h b/lib/librte_node/node_private.h
> index ab7941c12b..bafea3704d 100644
> --- a/lib/librte_node/node_private.h
> +++ b/lib/librte_node/node_private.h
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  extern int rte_node_logtype;
>  #define NODE_LOG(level, node_name, ...)  
>   \
> @@ -21,7 +22,6 @@ extern int rte_node_logtype;
>  #define node_dbg(node_name, ...) NODE_LOG(DEBUG, node_name, __VA_ARGS__)
>
>  /**
> - *
>   * Node mbuf private data to store next hop, ttl and checksum.
>   */
>  struct node_mbuf_priv1 {
> @@ -37,6 +37,13 @@ struct node_mbuf_priv1 {
> };
>  };
>
> +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> +   .name = "rte_node_dynfield_priv1",
> +   .size = sizeof(uint64_t),
> +   .align = __alignof__(uint64_t),

s/uint64_t/struct node_mbuf_priv1/g ?


> +};
> +extern int node_mbuf_priv1_dynfield_offset;
> +
>  /**
>   * Node mbuf private area 2.
>   */


-- 
David Marchand



Re: [dpdk-dev] [PATCH 05/15] security: switch metadata to dynamic mbuf field

2020-10-26 Thread David Marchand
On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
> diff --git a/lib/librte_security/rte_security_driver.h 
> b/lib/librte_security/rte_security_driver.h
> index 1b561f8528..ba9691b4a0 100644
> --- a/lib/librte_security/rte_security_driver.h
> +++ b/lib/librte_security/rte_security_driver.h
> @@ -17,6 +17,8 @@
>  extern "C" {
>  #endif
>
> +#include 
> +
>  #include "rte_security.h"
>
>  /**
> @@ -89,6 +91,24 @@ typedef int (*security_session_stats_get_t)(void *device,
> struct rte_security_session *sess,
> struct rte_security_stats *stats);
>
> +/* Dynamic mbuf field for device-specific metadata */
> +static const struct rte_mbuf_dynfield rte_security_dynfield_desc = {
> +   .name = RTE_SECURITY_DYNFIELD_NAME,
> +   .size = sizeof(RTE_SECURITY_DYNFIELD_TYPE),
> +   .align = __alignof__(RTE_SECURITY_DYNFIELD_TYPE),
> +};

Should be in rte_security.c?


> +extern int rte_security_dynfield_offset;
> +
> +__rte_experimental
> +int rte_security_dynfield_register(void);
> +
> +static inline RTE_SECURITY_DYNFIELD_TYPE *
> +rte_security_dynfield(struct rte_mbuf *mbuf)
> +{
> +   return RTE_MBUF_DYNFIELD(mbuf,
> +   rte_security_dynfield_offset, RTE_SECURITY_DYNFIELD_TYPE *);
> +}
> +

-- 
David Marchand



Re: [dpdk-dev] [PATCH 08/15] net/bnxt: switch CFA code to dynamic mbuf field

2020-10-26 Thread David Marchand
On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
>
> The CFA code from mark was stored in the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.

We convert from a 64 bits (with only upper 32 bits filled) to a 32 bits field.
Worth a comment on the API change for users (if any).


-- 
David Marchand



Re: [dpdk-dev] [PATCH 14/15] examples/rxtx_callbacks: switch to dynamic mbuf field

2020-10-26 Thread David Marchand
On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
>
> The example used the deprecated mbuf field udata64.
> It is moved to a dynamic field in order to allow removal of udata64.
>
> Note: RTE_MBUF_DYNFIELD_TIMESTAMP_NAME is an existing mbuf field name.

I am a bit lost.
How is this going to work as the mbuf timestamp field is used in this
example too?


-- 
David Marchand



Re: [dpdk-dev] [PATCH] app/procinfo: clean old build macro

2020-10-26 Thread David Marchand
On Fri, Oct 23, 2020 at 9:45 AM David Marchand
 wrote:
>
> When merging this series after Bruce changes on build macros, an old macro
> usage has been re-introduced.
>
> Fixes: d82d6ac64338 ("app/procinfo: add crypto security context info")
>
> Signed-off-by: David Marchand 
Acked-by: Thomas Monjalon 

Applied.


-- 
David Marchand



[dpdk-dev] [PATCH 1/2] common/mlx5: get number of ports that can be bonded

2020-10-26 Thread Xueming Li
Get HCA capability: number of physical ports that can be bonded.

Cc: sta...@dpdk.org

Signed-off-by: Xueming Li 
Acked-by: Matan Azrad 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 5 +++--
 drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 8aee12d527..e748d034d0 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -711,6 +711,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->non_wire_sq = MLX5_GET(cmd_hca_cap, hcattr, non_wire_sq);
attr->log_max_static_sq_wq = MLX5_GET(cmd_hca_cap, hcattr,
  log_max_static_sq_wq);
+   attr->num_lag_ports = MLX5_GET(cmd_hca_cap, hcattr, num_lag_ports);
attr->dev_freq_khz = MLX5_GET(cmd_hca_cap, hcattr,
  device_frequency_khz);
attr->scatter_fcs_w_decap_disable =
@@ -1429,8 +1430,8 @@ mlx5_devx_cmd_create_tis(void *ctx,
tis_ctx = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tis_ctx, strict_lag_tx_port_affinity,
 tis_attr->strict_lag_tx_port_affinity);
-   MLX5_SET(tisc, tis_ctx, strict_lag_tx_port_affinity,
-tis_attr->strict_lag_tx_port_affinity);
+   MLX5_SET(tisc, tis_ctx, lag_tx_port_affinity,
+tis_attr->lag_tx_port_affinity);
MLX5_SET(tisc, tis_ctx, prio, tis_attr->prio);
MLX5_SET(tisc, tis_ctx, transport_domain,
 tis_attr->transport_domain);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index abbea67784..3781fedd9e 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -99,6 +99,7 @@ struct mlx5_hca_attr {
uint32_t cross_channel:1;
uint32_t non_wire_sq:1; /* SQ with non-wire ops is supported. */
uint32_t log_max_static_sq_wq:5; /* Static WQE size SQ. */
+   uint32_t num_lag_ports:4; /* Number of ports can be bonded. */
uint32_t dev_freq_khz; /* Timestamp counter frequency, kHz. */
uint32_t scatter_fcs_w_decap_disable:1;
uint32_t regex:1;
-- 
2.25.1



[dpdk-dev] [PATCH 2/2] vdpa/mlx5: specify lag port affinity

2020-10-26 Thread Xueming Li
If set TIS lag port affinity to auto, firmware assign port affinity on
each creation with Round Robin. In case of 2 PFs, if create virtq,
destroy and create again, then each virtq will get same port affinity.

To resolve this fw limitation, this patch sets create TIS with specified
affinity for each PF.

Cc: sta...@dpdk.org

Signed-off-by: Xueming Li 
Acked-by: Matan Azrad 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c   |  3 +++
 drivers/vdpa/mlx5/mlx5_vdpa.h   |  3 ++-
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 23 ++-
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index a8f3e4b1de..2e17ed4fca 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -730,6 +730,9 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
}
priv->caps = attr.vdpa;
priv->log_max_rqt_size = attr.log_max_rqt_size;
+   priv->num_lag_ports = attr.num_lag_ports;
+   if (attr.num_lag_ports == 0)
+   priv->num_lag_ports = 1;
priv->ctx = ctx;
priv->pci_dev = pci_dev;
priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index fcbc12ab0c..c8c1adfde4 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -146,8 +146,9 @@ struct mlx5_vdpa_priv {
struct mlx5dv_devx_uar *uar;
struct rte_intr_handle intr_handle;
struct mlx5_devx_obj *td;
-   struct mlx5_devx_obj *tis;
+   struct mlx5_devx_obj *tiss[16]; /* TIS list for each LAG port. */
uint16_t nr_virtqs;
+   uint8_t num_lag_ports;
uint64_t features; /* Negotiated features. */
uint16_t log_max_rqt_size;
struct mlx5_vdpa_steer steer;
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
index 17e71cf4f4..4724baca4e 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
@@ -103,12 +103,13 @@ void
 mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv)
 {
int i;
-
for (i = 0; i < priv->nr_virtqs; i++)
mlx5_vdpa_virtq_unset(&priv->virtqs[i]);
-   if (priv->tis) {
-   claim_zero(mlx5_devx_cmd_destroy(priv->tis));
-   priv->tis = NULL;
+   for (i = 0; i < priv->num_lag_ports; i++) {
+   if (priv->tiss[i]) {
+   claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
+   priv->tiss[i] = NULL;
+   }
}
if (priv->td) {
claim_zero(mlx5_devx_cmd_destroy(priv->td));
@@ -302,7 +303,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int 
index)
attr.hw_used_index = last_used_idx;
attr.q_size = vq.size;
attr.mkey = priv->gpa_mkey_index;
-   attr.tis_id = priv->tis->id;
+   attr.tis_id = priv->tiss[(index / 2) % priv->num_lag_ports]->id;
attr.queue_index = index;
attr.pd = priv->pdn;
virtq->virtq = mlx5_devx_cmd_create_virtq(priv->ctx, &attr);
@@ -432,10 +433,14 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv)
return -rte_errno;
}
tis_attr.transport_domain = priv->td->id;
-   priv->tis = mlx5_devx_cmd_create_tis(priv->ctx, &tis_attr);
-   if (!priv->tis) {
-   DRV_LOG(ERR, "Failed to create TIS.");
-   goto error;
+   for (i = 0; i < priv->num_lag_ports; i++) {
+   /* 0 is auto affinity, non-zero value to propose port. */
+   tis_attr.lag_tx_port_affinity = i + 1;
+   priv->tiss[i] = mlx5_devx_cmd_create_tis(priv->ctx, &tis_attr);
+   if (!priv->tiss[i]) {
+   DRV_LOG(ERR, "Failed to create TIS %u.", i);
+   goto error;
+   }
}
priv->nr_virtqs = nr_vring;
for (i = 0; i < nr_vring; i++)
-- 
2.25.1



[dpdk-dev] [PATCH v2 1/2] common/mlx5: add virtq attributes error fields

2020-10-26 Thread Xueming Li
Add the needed fields for virtq DevX object to read the error state.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 3 +++
 drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
 drivers/common/mlx5/mlx5_prm.h   | 9 +++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 8aee12d527..dc426e9b09 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1754,6 +1754,9 @@ mlx5_devx_cmd_query_virtq(struct mlx5_devx_obj *virtq_obj,
attr->hw_available_index = MLX5_GET16(virtio_net_q, virtq,
  hw_available_index);
attr->hw_used_index = MLX5_GET16(virtio_net_q, virtq, hw_used_index);
+   attr->state = MLX5_GET16(virtio_net_q, virtq, state);
+   attr->error_type = MLX5_GET16(virtio_net_q, virtq,
+ virtio_q_context.error_type);
return ret;
 }
 
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index abbea67784..0ea2427b75 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -298,6 +298,7 @@ struct mlx5_devx_virtq_attr {
uint32_t size;
uint64_t offset;
} umems[3];
+   uint8_t error_type;
 };
 
 
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index d342263c85..7d671a3996 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2280,7 +2280,8 @@ struct mlx5_ifc_virtio_q_bits {
u8 used_addr[0x40];
u8 available_addr[0x40];
u8 virtio_q_mkey[0x20];
-   u8 reserved_at_160[0x20];
+   u8 reserved_at_160[0x18];
+   u8 error_type[0x8];
u8 umem_1_id[0x20];
u8 umem_1_size[0x20];
u8 umem_1_offset[0x40];
@@ -2308,7 +2309,7 @@ struct mlx5_ifc_virtio_net_q_bits {
u8 vhost_log_page[0x5];
u8 reserved_at_90[0xc];
u8 state[0x4];
-   u8 error_type[0x8];
+   u8 reserved_at_a0[0x8];
u8 tisn_or_qpn[0x18];
u8 dirty_bitmap_mkey[0x20];
u8 dirty_bitmap_size[0x20];
@@ -2329,6 +2330,10 @@ struct mlx5_ifc_query_virtq_out_bits {
struct mlx5_ifc_virtio_net_q_bits virtq;
 };
 
+enum {
+   MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27,
+};
+
 enum {
MLX5_QP_ST_RC = 0x0,
 };
-- 
2.25.1



[dpdk-dev] [PATCH v2 2/2] vdpa/mlx5: hardware error handling

2020-10-26 Thread Xueming Li
When hardware error happens, vdpa didn't get such information and leave
driver in silent: working state but no response.

This patch subscribes firmware virtq error event and try to recover max
3 times in 10 seconds, stop virtq if max retry number reached.

When error happens, PMD log in warning level. If failed to recover,
outputs error log. Query virtq statistics to get error counters report.

Acked-by: Matan Azrad 
Signed-off-by: Xueming Li 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c   |   2 +
 drivers/vdpa/mlx5/mlx5_vdpa.h   |  37 
 drivers/vdpa/mlx5/mlx5_vdpa_event.c | 142 
 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c |  61 +---
 4 files changed, 227 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index a8f3e4b1de..ba779c10ee 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -283,6 +283,7 @@ mlx5_vdpa_dev_close(int vid)
}
if (priv->configured)
ret |= mlx5_vdpa_lm_log(priv);
+   mlx5_vdpa_err_event_unset(priv);
mlx5_vdpa_cqe_event_unset(priv);
mlx5_vdpa_steer_unset(priv);
mlx5_vdpa_virtqs_release(priv);
@@ -318,6 +319,7 @@ mlx5_vdpa_dev_config(int vid)
DRV_LOG(WARNING, "MTU cannot be set on device %s.",
vdev->device->name);
if (mlx5_vdpa_pd_create(priv) || mlx5_vdpa_mem_register(priv) ||
+   mlx5_vdpa_err_event_setup(priv) ||
mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
mlx5_vdpa_cqe_event_setup(priv)) {
mlx5_vdpa_dev_close(vid);
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
index fcbc12ab0c..0d6886c52c 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
@@ -87,6 +87,7 @@ struct mlx5_vdpa_virtq {
uint16_t vq_size;
uint8_t notifier_state;
bool stopped;
+   uint32_t version;
struct mlx5_vdpa_priv *priv;
struct mlx5_devx_obj *virtq;
struct mlx5_devx_obj *counters;
@@ -97,6 +98,8 @@ struct mlx5_vdpa_virtq {
uint32_t size;
} umems[3];
struct rte_intr_handle intr_handle;
+   uint64_t err_time[3]; /* RDTSC time of recent errors. */
+   uint32_t n_retry;
struct mlx5_devx_virtio_q_couners_attr reset;
 };
 
@@ -143,8 +146,10 @@ struct mlx5_vdpa_priv {
struct rte_vhost_memory *vmem;
uint32_t eqn;
struct mlx5dv_devx_event_channel *eventc;
+   struct mlx5dv_devx_event_channel *err_chnl;
struct mlx5dv_devx_uar *uar;
struct rte_intr_handle intr_handle;
+   struct rte_intr_handle err_intr_handle;
struct mlx5_devx_obj *td;
struct mlx5_devx_obj *tis;
uint16_t nr_virtqs;
@@ -259,6 +264,25 @@ int mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv);
  */
 void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
 
+/**
+ * Setup error interrupt handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int mlx5_vdpa_err_event_setup(struct mlx5_vdpa_priv *priv);
+
+/**
+ * Unset error event handler.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ */
+void mlx5_vdpa_err_event_unset(struct mlx5_vdpa_priv *priv);
+
 /**
  * Release a virtq and all its related resources.
  *
@@ -392,6 +416,19 @@ int mlx5_vdpa_virtq_modify(struct mlx5_vdpa_virtq *virtq, 
int state);
  */
 int mlx5_vdpa_virtq_stop(struct mlx5_vdpa_priv *priv, int index);
 
+/**
+ * Query virtq information.
+ *
+ * @param[in] priv
+ *   The vdpa driver private structure.
+ * @param[in] index
+ *   The virtq index.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_vdpa_virtq_query(struct mlx5_vdpa_priv *priv, int index);
+
 /**
  * Get virtq statistics.
  *
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_event.c 
b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
index 8a01e42794..7d75b09757 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa_event.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa_event.c
@@ -15,11 +15,14 @@
 #include 
 
 #include 
+#include 
 
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
 
+#define MLX5_VDPA_ERROR_TIME_SEC 3u
+
 void
 mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
 {
@@ -378,6 +381,145 @@ mlx5_vdpa_interrupt_handler(void *cb_arg)
pthread_mutex_unlock(&priv->vq_config_lock);
 }
 
+static void
+mlx5_vdpa_err_interrupt_handler(void *cb_arg __rte_unused)
+{
+#ifdef HAVE_IBV_DEVX_EVENT
+   struct mlx5_vdpa_priv *priv = cb_arg;
+   union {
+   struct mlx5dv_devx_async_event_hdr event_resp;
+   uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
+   } out;
+   uint32_t vq_index, i, version;
+   struct mlx5_vdpa_virtq *virtq;
+   uint64_t sec;
+
+   pthread_mutex_lock(&priv->vq_config_lock);
+  

Re: [dpdk-dev] [PATCH v1] net/ice: refactor dynamic mbuf in data extraction

2020-10-26 Thread Wang, Haiyue
Hi Olivier,

> -Original Message-
> From: Olivier Matz 
> Sent: Monday, October 26, 2020 18:22
> To: Wang, Haiyue 
> Cc: dev@dpdk.org; Yigit, Ferruh ; Guo, Jia 
> ; Zhang, Qi Z
> ; Chen, Zhaoyan ; Yang, Qiming 
> ;
> Ray Kinsella ; Neil Horman 
> Subject: Re: [dpdk-dev] [PATCH v1] net/ice: refactor dynamic mbuf in data 
> extraction
> 
> Hi Haiyue,
> 
> On Sun, Oct 25, 2020 at 03:13:52PM +0800, Haiyue Wang wrote:
> > Current dynamic mbuf design is that the driver will register the needed
> > field and flags at the device probing time, this will make iavf PMD use
> > different names to register the dynamic mbuf field and flags, but both
> > of them use the exactly same protocol extraction metadata.
> >
> > This will run out of the limited dynamic mbuf resource, meanwhile, the
> > application has to handle the dynamic mbuf separately.
> >
> > For making things simple and consistent, refactor dynamic mbuf in data
> > extraction handling: the PMD just lookups the same name at the queue
> > setup time after the application registers it.
> >
> > In other words, make the dynamic mbuf string name as API, not the data
> > object which is defined in each PMD.
> 
> In case the dynamic mbuf field is shared by several PMDs, it seems to
> be indeed a better solution.
> 
> Currently, the "union rte_pmd_proto_xtr_metadata" is still defined in
> rte_pmd_ice.h. Will it be the same for iavf, and will it be factorized
> somewhere? However I don't know where could be a good place.
> 
> There is already lib/librte_mbuf/rte_mbuf_dyn.h which is the place to
> centralize the name and description of dynamic fields/flags used in
> libraries. But I think neither structure definitions nor PMD-specific
> flags should go there too. I'd prefer to have them in
> drivers/net/, but I'm not sure it is possible.

May be new 'lib/librte_mbuf/rte_mbuf_dyn_pmd.h' for all PMDs specific ?
So that the application knows exactly how many *dynamic* things. Also,
a new API to query the dynamic information + dev_ops may be introduced
in next release cycle, then 'rte_pmd_mlx5_get_dyn_flag_names' can be
removed. And the application will be clean.

Currently, we use " #define __INTEL_RX_FLEX_DESC_METADATA__ " to fix the
duplicated definition, but the application have to include the two header
files like "rte_pmd_ice.h" / "rte_pmd_iavf.h"

> 
> Also, it is difficult from the patch to see the impact it has on an
> application that was using these metadata. Should we have an example
> of use?
> 

Thanks your link in previous mail:
http://inbox.dpdk.org/dev/20191030165626.w3flq5wdpitpsv2v@platinum/

Original patch uses: Solution 1, provide static inline helpers to access to the
dyn fields/flags

Now now: Solution 2, without global variable export and helpers: the application
calls rte_mbuf_dynfield_register(&rte_pmd_ice_proto_xtr_metadata_param) to get
the offset, and store it privately.

https://patchwork.dpdk.org/patch/82165/
In v3 patch, I kept the metadata format, and rename it to be more generic:
'union rte_pmd_proto_xtr_metadata', but no dump function as the original design.

> Thanks,
> Olivier
> 
> 
> > Signed-off-by: Haiyue Wang 
> > ---
> > 2.29.0
> >


Re: [dpdk-dev] [PATCH v8] net/iavf: support flex desc metadata extraction

2020-10-26 Thread Wang, Haiyue
Hi Olivier,

> -Original Message-
> From: Olivier Matz 
> Sent: Monday, October 26, 2020 17:37
> To: Yigit, Ferruh 
> Cc: Guo, Jia ; Wu, Jingjing ; 
> Zhang, Qi Z
> ; Xing, Beilei ; dev@dpdk.org; 
> Wang, Haiyue
> ; Richardson, Bruce 
> Subject: Re: [PATCH v8] net/iavf: support flex desc metadata extraction
> 
> Hi,
> 
> On Wed, Oct 14, 2020 at 01:31:39PM +0100, Ferruh Yigit wrote:
> > On 10/13/2020 9:17 AM, Jeff Guo wrote:
> > > Enable metadata extraction for flexible descriptors in AVF, that would
> > > allow network function directly get metadata without additional parsing
> > > which would reduce the CPU cost for VFs. The enabling metadata
> > > extractions involve the metadata of VLAN/IPv4/IPv6/IPv6-FLOW/TCP/MPLS
> > > flexible descriptors, and the VF could negotiate the capability of
> > > the flexible descriptor with PF and correspondingly configure the
> > > specific offload at receiving queues.
> > >
> > > Signed-off-by: Jeff Guo 
> > > Acked-by: Haiyue Wang 
> 
> [...]
> 
> > > +EXPERIMENTAL {
> > > +global:
> > > +
> > > +# added in 20.11
> > > +rte_net_iavf_dynfield_proto_xtr_metadata_offs;
> > > +rte_net_iavf_dynflag_proto_xtr_vlan_mask;
> > > +rte_net_iavf_dynflag_proto_xtr_ipv4_mask;
> > > +rte_net_iavf_dynflag_proto_xtr_ipv6_mask;
> > > +rte_net_iavf_dynflag_proto_xtr_ipv6_flow_mask;
> > > +rte_net_iavf_dynflag_proto_xtr_tcp_mask;
> > > +rte_net_iavf_dynflag_proto_xtr_ip_offset_mask;
> >
> > As a namespace previously "rte_pmd_xxx" was used for PMD specific APIs, can
> > you please switch to that?
> > 'rte_net_' is used by the 'librte_net' library.
> >
> > Above list is the dynfield values, what is the correct usage for dynfields,
> > 1- Put dynfileds names in to the header, and application does a lookup
> > ('rte_mbuf_dynfield_lookup()') to get the dynfield values.
> > or
> > 2- Expose dynfield values to be accessed directly from application, as done 
> > above.
> >
> > @Oliver, can you please support.
> >
> > I can see (1) has advantage of portability if more than one PMD supports
> > same dynfield names, but that sees not a case for above ones.
> 
> If I understand the question correctly, this is the same that was
> discussed here:
> 
>   http://inbox.dpdk.org/dev/20191030165626.w3flq5wdpitpsv2v@platinum/
> 
> To me, exporting the variables containing the dynfield offsets is easier
> to use: we don't need to have additional private variables to store them
> in each API users (usually one static variable per file, which can be
> heavy).

No issue for one PMD, but if two PMDs share the same dynfields, the application
has to use two namespace variables to access the same value, like:

if (mb->ol_flags & PMD_A_DYNFIELD_B_MASK)

else if (mb->ol_flags & PMD_B_DYNFIELD_B_MASK)

This make the application code a little duplicated. ;-)

> 
> Olivier


[dpdk-dev] [PATCH] mlx5: add ConnectX-7 and Bluefield-3 device IDs

2020-10-26 Thread Raslan Darawsheh
This adds the ConnectX-7 and Bluefield-3 device ids to the list of
supported Mellanox devices that run the MLX5 PMDs.
The devices is still in development stage.

Signed-off-by: Raslan Darawsheh 
---
 drivers/common/mlx5/mlx5_common.h | 2 ++
 drivers/net/mlx5/mlx5.c   | 8 
 drivers/regex/mlx5/mlx5_regex.c   | 4 
 drivers/vdpa/mlx5/mlx5_vdpa.c | 8 
 4 files changed, 22 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index ed44a45a81..93985bf2c9 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -132,6 +132,8 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF = 0x101e,
PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF = 0xa2d6,
PCI_DEVICE_ID_MELLANOX_CONNECTX6LX = 0x101f,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7 = 0x1021,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7BF = 0Xa2dc,
 };
 
 /* Maximum number of simultaneous unicast MAC addresses. */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 77aeac85c5..0316253ca8 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2122,6 +2122,14 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
PCI_DEVICE_ID_MELLANOX_CONNECTX6LX)
},
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7)
+   },
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
+   },
{
.vendor_id = 0
}
diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c
index 35e8ea0a9a..17590b937c 100644
--- a/drivers/regex/mlx5/mlx5_regex.c
+++ b/drivers/regex/mlx5/mlx5_regex.c
@@ -254,6 +254,10 @@ static const struct rte_pci_id mlx5_regex_pci_id_map[] = {
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
},
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
+   },
{
.vendor_id = 0
}
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index a8f3e4b1de..2d88633bfd 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -825,6 +825,14 @@ static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
},
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7)
+   },
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+   PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
+   },
{
.vendor_id = 0
}
-- 
2.17.1



[dpdk-dev] [PATCH v4 2/6] net/mlx5: configure Rx queue to support split

2020-10-26 Thread Viacheslav Ovsiienko
The scatter-gather elements should be configured
accordingly to support the buffer split feature.
The application provides the desired settings for
the segments at the beginning of the packets and
PMD pads the buffer chain (if needed) with attributes
of last specified segment to accommodate the packet
of maximal length.

There are some limitations are implied. The MPRQ
feature should be disengaged if split is requested,
due to MPRQ neither supports pushing data to the
dedicated pools nor follows the flexible buffer sizes.
The vectorized rx_burst routines does not support
the scattering (these ones are extremely simplified
and work over the single segment only) and can't
handle split as well.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_rxq.c | 85 +
 1 file changed, 79 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 72d76c1..7695d62 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1402,9 +1402,82 @@ struct mlx5_rxq_ctrl *
 * the vector Rx will not be used.
 */
uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+   const struct rte_eth_rxseg_split *qs_seg = rx_seg;
+   unsigned int tail_len;
 
-   if (non_scatter_min_mbuf_size > mb_len && !(offloads &
-   DEV_RX_OFFLOAD_SCATTER)) {
+   tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
+  desc_n * sizeof(struct rte_mbuf *), 0, socket);
+   if (!tmpl) {
+   rte_errno = ENOMEM;
+   return NULL;
+   }
+   MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
+   /*
+* Build the array of actual buffer offsets and lengths.
+* Pad with the buffers from the last memory pool if
+* needed to handle max size packets, replace zero length
+* with the buffer length from the pool.
+*/
+   tail_len = max_rx_pkt_len;
+   do {
+   struct mlx5_eth_rxseg *hw_seg =
+   &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n];
+   uint32_t buf_len, offset, seg_len;
+
+   /*
+* For the buffers beyond descriptions offset is zero,
+* the first buffer contains head room.
+*/
+   buf_len = rte_pktmbuf_data_room_size(qs_seg->mp);
+   offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) +
+(tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM);
+   /*
+* For the buffers beyond descriptions the length is
+* pool buffer length, zero lengths are replaced with
+* pool buffer length either.
+*/
+   seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len :
+  qs_seg->length ?
+  qs_seg->length :
+  (buf_len - offset);
+   /* Check is done in long int, now overflows. */
+   if (buf_len < seg_len + offset) {
+   DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length "
+"%u/%u can't be satisfied",
+dev->data->port_id, idx,
+qs_seg->length, qs_seg->offset);
+   rte_errno = EINVAL;
+   goto error;
+   }
+   if (seg_len > tail_len)
+   seg_len = buf_len - offset;
+   if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) {
+   DRV_LOG(ERR,
+   "port %u too many SGEs (%u) needed to handle"
+   " requested maximum packet size %u, the maximum"
+   " supported are %u", dev->data->port_id,
+   tmpl->rxq.rxseg_n, max_rx_pkt_len,
+   MLX5_MAX_RXQ_NSEG);
+   rte_errno = ENOTSUP;
+   goto error;
+   }
+   /* Build the actual scattering element in the queue object. */
+   hw_seg->mp = qs_seg->mp;
+   MLX5_ASSERT(offset <= UINT16_MAX);
+   MLX5_ASSERT(seg_len <= UINT16_MAX);
+   hw_seg->offset = (uint16_t)offset;
+   hw_seg->length = (uint16_t)seg_len;
+   /*
+* Advance the segment descriptor, the padding is the based
+* on the attributes of the last descriptor.
+*/
+   if (tmpl->rxq.rxseg_n < n_seg)
+   qs_seg++;
+   tail_len -= RTE_MIN(tail_len, seg_len);
+   } while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n)

[dpdk-dev] [PATCH v4 0/6] net/mlx5: add Rx buffer split support

2020-10-26 Thread Viacheslav Ovsiienko
This patch adds to PMD the functionality for the receiving
buffer split feasture [1]

[1] http://patches.dpdk.org/patch/81154/

Signed-off-by: Viacheslav Ovsiienko 

---
v1: http://patches.dpdk.org/patch/81808/

v2: http://patches.dpdk.org/patch/81923/
- typos
- documentation is updated

v3: http://patches.dpdk.org/patch/82177/
- extra parameter checks in PMD rx_queue_setup removed
- minor optimizations in PMD

v4: - rebasing

Viacheslav Ovsiienko (6):
  net/mlx5: add extended Rx queue setup routine
  net/mlx5: configure Rx queue to support split
  net/mlx5: register multiple pool for Rx queue
  net/mlx5: update Rx datapath to support split
  net/mlx5: report Rx segmentation capabilities
  doc: add buffer split feature limitation to mlx5 guide

 doc/guides/nics/mlx5.rst|   6 +-
 drivers/net/mlx5/mlx5.h |   3 +
 drivers/net/mlx5/mlx5_ethdev.c  |   4 ++
 drivers/net/mlx5/mlx5_mr.c  |   3 +
 drivers/net/mlx5/mlx5_rxq.c | 136 +++-
 drivers/net/mlx5/mlx5_rxtx.c|   3 +-
 drivers/net/mlx5/mlx5_rxtx.h|  13 +++-
 drivers/net/mlx5/mlx5_trigger.c |  20 +++---
 8 files changed, 160 insertions(+), 28 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v4 4/6] net/mlx5: update Rx datapath to support split

2020-10-26 Thread Viacheslav Ovsiienko
Only the regular rx_burst routine is updated to support split,
because the vectorized ones does not support scatter and MPRQ
does not support split at all.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_rxq.c  | 11 +--
 drivers/net/mlx5/mlx5_rxtx.c |  3 ++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7695d62..f9aed38 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -212,9 +212,10 @@
 
/* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
+   struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n];
struct rte_mbuf *buf;
 
-   buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
+   buf = rte_pktmbuf_alloc(seg->mp);
if (buf == NULL) {
DRV_LOG(ERR, "port %u empty mbuf pool",
PORT_ID(rxq_ctrl->priv));
@@ -227,12 +228,10 @@
MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0);
MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
MLX5_ASSERT(!buf->next);
-   /* Only the first segment keeps headroom. */
-   if (i % sges_n)
-   SET_DATA_OFF(buf, 0);
+   SET_DATA_OFF(buf, seg->offset);
PORT(buf) = rxq_ctrl->rxq.port_id;
-   DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
-   PKT_LEN(buf) = DATA_LEN(buf);
+   DATA_LEN(buf) = seg->length;
+   PKT_LEN(buf) = seg->length;
NB_SEGS(buf) = 1;
(*rxq_ctrl->rxq.elts)[i] = buf;
}
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index dbb427b..2ffacf8 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1356,7 +1356,8 @@ enum mlx5_txcmp_code {
rte_prefetch0(seg);
rte_prefetch0(cqe);
rte_prefetch0(wqe);
-   rep = rte_mbuf_raw_alloc(rxq->mp);
+   /* Allocate the buf from the same pool. */
+   rep = rte_mbuf_raw_alloc(seg->pool);
if (unlikely(rep == NULL)) {
++rxq->stats.rx_nombuf;
if (!pkt) {
-- 
1.8.3.1



[dpdk-dev] [PATCH v4 1/6] net/mlx5: add extended Rx queue setup routine

2020-10-26 Thread Viacheslav Ovsiienko
The routine to provide Rx queue setup with specifying
extended receiving buffer description is added.
It allows application to specify desired segment
lengths, data position offsets in the buffer
and dedicated memory pool for each segment.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5.h  |  3 +++
 drivers/net/mlx5/mlx5_rxq.c  | 39 ++-
 drivers/net/mlx5/mlx5_rxtx.h | 13 -
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bb954c4..258be03 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -164,6 +164,9 @@ struct mlx5_stats_ctrl {
 /* Maximal size of aggregated LRO packet. */
 #define MLX5_MAX_LRO_SIZE (UINT8_MAX * MLX5_LRO_SEG_CHUNK_SIZE)
 
+/* Maximal number of segments to split. */
+#define MLX5_MAX_RXQ_NSEG (1u << MLX5_MAX_LOG_RQ_SEGS)
+
 /* LRO configurations structure. */
 struct mlx5_lro_config {
uint32_t supported:1; /* Whether LRO is supported. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 0176ece..72d76c1 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -744,12 +744,40 @@
struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+   struct rte_eth_rxseg_split *rx_seg =
+   (struct rte_eth_rxseg_split *)conf->rx_seg;
+   struct rte_eth_rxseg_split rx_single = {.mp = mp};
+   uint16_t n_seg = conf->rx_nseg;
int res;
 
+   if (mp) {
+   /*
+* The parameters should be checked on rte_eth_dev layer.
+* If mp is specified it means the compatible configuration
+* without buffer split feature tuning.
+*/
+   rx_seg = &rx_single;
+   n_seg = 1;
+   }
+   if (n_seg > 1) {
+   uint64_t offloads = conf->offloads |
+   dev->data->dev_conf.rxmode.offloads;
+
+   /* The offloads should be checked on rte_eth_dev layer. */
+   MLX5_ASSERT(offloads & DEV_RX_OFFLOAD_SCATTER);
+   if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
+   DRV_LOG(ERR, "port %u queue index %u split "
+"offload not configured",
+dev->data->port_id, idx);
+   rte_errno = ENOSPC;
+   return -rte_errno;
+   }
+   MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG);
+   }
res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
if (res)
return res;
-   rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
+   rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, n_seg);
if (!rxq_ctrl) {
DRV_LOG(ERR, "port %u unable to allocate queue index %u",
dev->data->port_id, idx);
@@ -1342,11 +1370,11 @@
 struct mlx5_rxq_ctrl *
 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 unsigned int socket, const struct rte_eth_rxconf *conf,
-struct rte_mempool *mp)
+const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg)
 {
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
-   unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+   unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp);
struct mlx5_dev_config *config = &priv->config;
uint64_t offloads = conf->offloads |
   dev->data->dev_conf.rxmode.offloads;
@@ -1358,7 +1386,8 @@ struct mlx5_rxq_ctrl *
RTE_PKTMBUF_HEADROOM;
unsigned int max_lro_size = 0;
unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
-   const int mprq_en = mlx5_check_mprq_support(dev) > 0;
+   const int mprq_en = mlx5_check_mprq_support(dev) > 0 && n_seg == 1 &&
+   !rx_seg[0].offset && !rx_seg[0].length;
unsigned int mprq_stride_nums = config->mprq.stride_num_n ?
config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
unsigned int mprq_stride_size = non_scatter_min_mbuf_size <=
@@ -1544,7 +1573,7 @@ struct mlx5_rxq_ctrl *
(!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
tmpl->rxq.port_id = dev->data->port_id;
tmpl->priv = priv;
-   tmpl->rxq.mp = mp;
+   tmpl->rxq.mp = rx_seg[0].mp;
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.rq_repl_thresh =
MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 1b35a26..f204f7e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_r

[dpdk-dev] [PATCH v4 3/6] net/mlx5: register multiple pool for Rx queue

2020-10-26 Thread Viacheslav Ovsiienko
The split feature for receiving packets was added to the mlx5
PMD, now Rx queue can receive the data to the buffers belonging
to the different pools and the memory of all the involved pool
must be registered for DMA operations in order to allow hardware
to store the data.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_mr.c  |  3 +++
 drivers/net/mlx5/mlx5_trigger.c | 20 
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index dbcf0aa..c308ecc 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -536,6 +536,9 @@ struct mr_update_mp_data {
.ret = 0,
};
 
+   DRV_LOG(DEBUG, "Port %u Rx queue registering mp %s "
+  "having %u chunks.", dev->data->port_id,
+  mp->name, mp->nb_mem_chunks);
rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data);
if (data.ret < 0 && rte_errno == ENXIO) {
/* Mempool may have externally allocated memory. */
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7735f02..19f2d66 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -145,18 +145,22 @@
dev->data->port_id, priv->sh->device_attr.max_sge);
for (i = 0; i != priv->rxqs_n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
-   struct rte_mempool *mp;
 
if (!rxq_ctrl)
continue;
if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-   /* Pre-register Rx mempool. */
-   mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
-rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp;
-   DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s"
-   " having %u chunks.", dev->data->port_id,
-   rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks);
-   mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
+   /* Pre-register Rx mempools. */
+   if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
+   mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
+ rxq_ctrl->rxq.mprq_mp);
+   } else {
+   uint32_t s;
+
+   for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
+   mlx5_mr_update_mp
+   (dev, &rxq_ctrl->rxq.mr_ctrl,
+   rxq_ctrl->rxq.rxseg[s].mp);
+   }
ret = rxq_alloc_elts(rxq_ctrl);
if (ret)
goto error;
-- 
1.8.3.1



[dpdk-dev] [PATCH v4 6/6] doc: add buffer split feature limitation to mlx5 guide

2020-10-26 Thread Viacheslav Ovsiienko
The buffer split feature is mentioned in the mlx5 PMD
documentation, the limitation is description is added
as well.

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 doc/guides/nics/mlx5.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 66524f1..8dc7c62 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -59,7 +59,8 @@ Features
 
 - Multi arch support: x86_64, POWER8, ARMv8, i686.
 - Multiple TX and RX queues.
-- Support for scattered TX and RX frames.
+- Support for scattered TX frames.
+- Advanced support for scattered Rx frames with tunable buffer attributes.
 - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
 - RSS using different combinations of fields: L3 only, L4 only or both,
   and source only, destination only or both.
@@ -187,6 +188,9 @@ Limitations
the device. In case of ungraceful program termination, some entries may
remain present and should be removed manually by other means.
 
+- Buffer split offload is supported with regular Rx burst routine only,
+  no MPRQ feature or vectorized code can be engaged.
+
 - When Multi-Packet Rx queue is configured (``mprq_en``), a Rx packet can be
   externally attached to a user-provided mbuf with having EXT_ATTACHED_MBUF in
   ol_flags. As the mempool for the external buffer is managed by PMD, all the
-- 
1.8.3.1



[dpdk-dev] [PATCH v4 5/6] net/mlx5: report Rx segmentation capabilities

2020-10-26 Thread Viacheslav Ovsiienko
Add rte_eth_dev_info->rx_seg_capa parameters:
  - receiving to multiple pools is supported
  - buffer offsets are supported
  - no offset alignment requirement
  - reports the maximal number of segments
  - reports the buffer split offload flag

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_ethdev.c | 4 
 drivers/net/mlx5/mlx5_rxq.c| 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c70cd30..fc04fc8 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -306,6 +306,10 @@
info->max_tx_queues = max;
info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES;
info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev);
+   info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG;
+   info->rx_seg_capa.multi_pools = 1;
+   info->rx_seg_capa.offset_allowed = 1;
+   info->rx_seg_capa.offset_align_log2 = 0;
info->rx_offload_capa = (mlx5_get_rx_port_offloads() |
 info->rx_queue_offload_capa);
info->tx_offload_capa = mlx5_get_tx_port_offloads(dev);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f9aed38..1cc477a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -402,6 +402,7 @@
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_dev_config *config = &priv->config;
uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
+RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT |
 DEV_RX_OFFLOAD_TIMESTAMP |
 DEV_RX_OFFLOAD_JUMBO_FRAME |
 DEV_RX_OFFLOAD_RSS_HASH);
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH] baseband/acc100: remove logically dead code

2020-10-26 Thread Mcnamara, John


> -Original Message-
> From: dev  On Behalf Of Tom Rix
> Sent: Wednesday, October 21, 2020 2:58 PM
> To: Chautru, Nicolas ; dev@dpdk.org;
> akhil.go...@nxp.com
> Cc: Richardson, Bruce 
> Subject: Re: [dpdk-dev] [PATCH] baseband/acc100: remove logically dead
> code
> 
> 
> On 10/19/20 2:09 PM, Nicolas Chautru wrote:
> > Coverity reported dead code for a few error checks which are indeed
> > not reachable.
> >
> > Coverity issue: 363451, 363454, 363455
> >
> > Signed-off-by: Nicolas Chautru 
> 
> This change looks fine.
> 
> Should remove the 'Coverity issue: ... ' from the log.
> 
> Reviewed-by: Tom Rix 

It should stay. That is the convention we use:

git log | grep "Coverity issue"


[dpdk-dev] [PATCH] ethdev: fix data type for port id

2020-10-26 Thread wangyunjian
From: Yunjian Wang 

The ethdev port id should be 16 bits now. This patch fixes the data
type of the variable for 'pid', changing from uint32_t to uint16_t.

Fixes: 5b7ba31148a8 ("ethdev: add port ownership")
Cc: sta...@dpdk.org

Signed-off-by: Yunjian Wang 
---
 lib/librte_ethdev/rte_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index b12bb3854d..d52215b9a7 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -816,7 +816,7 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 int
 rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
 {
-   uint32_t pid;
+   uint16_t pid;
 
if (name == NULL) {
RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
-- 
2.23.0



Re: [dpdk-dev] [PATCH] ethdev: fix data type for port id

2020-10-26 Thread Thomas Monjalon
26/10/2020 13:24, wangyunjian:
> From: Yunjian Wang 
> 
> The ethdev port id should be 16 bits now. This patch fixes the data
> type of the variable for 'pid', changing from uint32_t to uint16_t.
> 
> Fixes: 5b7ba31148a8 ("ethdev: add port ownership")

It was 32-bit on purpose, to avoid overflow in this loop:
for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++)

It is now replaced by RTE_ETH_FOREACH_VALID_DEV,
but I wonder whether we still have this theoritical overflow risk.
If yes, we should change more variables to 32-bit.





Re: [dpdk-dev] [PATCH] ethdev: fix data type for port id

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 3:24 PM, wangyunjian wrote:
> From: Yunjian Wang 
> 
> The ethdev port id should be 16 bits now. This patch fixes the data
> type of the variable for 'pid', changing from uint32_t to uint16_t.
> 
> Fixes: 5b7ba31148a8 ("ethdev: add port ownership")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Yunjian Wang 

Reviewed-by: Andrew Rybchenko 



Re: [dpdk-dev] [PATCH] ethdev: fix data type for port id

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 3:29 PM, Thomas Monjalon wrote:
> 26/10/2020 13:24, wangyunjian:
>> From: Yunjian Wang 
>>
>> The ethdev port id should be 16 bits now. This patch fixes the data
>> type of the variable for 'pid', changing from uint32_t to uint16_t.
>>
>> Fixes: 5b7ba31148a8 ("ethdev: add port ownership")
> 
> It was 32-bit on purpose, to avoid overflow in this loop:
>   for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++)
> 
> It is now replaced by RTE_ETH_FOREACH_VALID_DEV,
> but I wonder whether we still have this theoritical overflow risk.
> If yes, we should change more variables to 32-bit.

Ah, it is too tricky. May be it is better to ensure that
RTE_MAX_ETHPORTS is less or equal to UINT16_MAX?




Re: [dpdk-dev] [PATCH] ethdev: fix data type for port id

2020-10-26 Thread Thomas Monjalon
26/10/2020 13:33, Andrew Rybchenko:
> On 10/26/20 3:29 PM, Thomas Monjalon wrote:
> > 26/10/2020 13:24, wangyunjian:
> >> From: Yunjian Wang 
> >>
> >> The ethdev port id should be 16 bits now. This patch fixes the data
> >> type of the variable for 'pid', changing from uint32_t to uint16_t.
> >>
> >> Fixes: 5b7ba31148a8 ("ethdev: add port ownership")
> > 
> > It was 32-bit on purpose, to avoid overflow in this loop:
> > for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++)
> > 
> > It is now replaced by RTE_ETH_FOREACH_VALID_DEV,
> > but I wonder whether we still have this theoritical overflow risk.
> > If yes, we should change more variables to 32-bit.
> 
> Ah, it is too tricky. May be it is better to ensure that
> RTE_MAX_ETHPORTS is less or equal to UINT16_MAX?

Yes could be another option.




Re: [dpdk-dev] [PATCH v5 1/7] app/bbdev: add explicit ut for latency vs validation

2020-10-26 Thread Tom Rix


On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> Adding explicit different ut when testing for validation
> or latency (early termination enabled or not).
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Aidan Goddard 
> Acked-by: Dave Burley 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 92 
> ++--
Should update the copyright.
>  1 file changed, 88 insertions(+), 4 deletions(-)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index 6e5535d..3554a77 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -3999,12 +3999,14 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   return i;
>  }
>  
> +/* Test case for latency/validation for LDPC Decoder */
>  static int
>  latency_test_ldpc_dec(struct rte_mempool *mempool,
>   struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op,
>   int vector_mask, uint16_t dev_id, uint16_t queue_id,
>   const uint16_t num_to_process, uint16_t burst_sz,
> - uint64_t *total_time, uint64_t *min_time, uint64_t *max_time)
> + uint64_t *total_time, uint64_t *min_time, uint64_t *max_time,
> + bool disable_et)
>  {
>   int ret = TEST_SUCCESS;
>   uint16_t i, j, dequeued;
> @@ -4026,7 +4028,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   "rte_bbdev_dec_op_alloc_bulk() failed");
>  
>   /* For latency tests we need to disable early termination */
> - if (check_bit(ref_op->ldpc_dec.op_flags,
> + if (disable_et && check_bit(ref_op->ldpc_dec.op_flags,
>   RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE))
>   ref_op->ldpc_dec.op_flags -=
>   RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE;
Bit clearing is usually done with &= ~()
> @@ -4248,7 +4250,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
>  
>   printf("+ --- +\n");
> - printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: 
> %u, op type: %s\n",
> + printf("== test: latency\ndev: %s, burst size: %u, num ops: %u, op 
> type: %s\n",
>   info.dev_name, burst_sz, num_to_process, op_type_str);
>  
>   if (op_type == RTE_BBDEV_OP_TURBO_DEC)
> @@ -4270,7 +4272,83 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   iter = latency_test_ldpc_dec(op_params->mp, bufs,
>   op_params->ref_dec_op, op_params->vector_mask,
>   ad->dev_id, queue_id, num_to_process,
> + burst_sz, &total_time, &min_time, &max_time,
> + true);
> + else
> + iter = latency_test_enc(op_params->mp, bufs,
> + op_params->ref_enc_op,
> + ad->dev_id, queue_id,
> + num_to_process, burst_sz, &total_time,
> + &min_time, &max_time);

This is a repeat of RTE_BBDEV_OP_TURBO_ENC.

Do not need both.

If the point is to have a else and not fail when the op_type is unknown, then

remove the earlier all and comment the else something like

else /* RTE_BBDEC_OP_TURBO_ENC */

> +
> + if (iter <= 0)
> + return TEST_FAILED;
> +
> + printf("Operation latency:\n"
> + "\tavg: %lg cycles, %lg us\n"
> + "\tmin: %lg cycles, %lg us\n"
> + "\tmax: %lg cycles, %lg us\n",
> + (double)total_time / (double)iter,
> + (double)(total_time * 100) / (double)iter /
> + (double)rte_get_tsc_hz(), (double)min_time,
> + (double)(min_time * 100) / (double)rte_get_tsc_hz(),
> + (double)max_time, (double)(max_time * 100) /
> + (double)rte_get_tsc_hz());
Could remove a tab from the last 9 lines for better alignment with printf
> +
> + return TEST_SUCCESS;
> +}
> +
> +static int
> +validation_test(struct active_device *ad,
> + struct test_op_params *op_params)
> +{
> + int iter;
> + uint16_t burst_sz = op_params->burst_sz;
> + const uint16_t num_to_process = op_params->num_to_process;
> + const enum rte_bbdev_op_type op_type = test_vector.op_type;
> + const uint16_t queue_id = ad->queue_ids[0];
> + struct test_buffers *bufs = NULL;
> + struct rte_bbdev_info info;
> + uint64_t total_time, min_time, max_time;
> + const char *op_type_str;
> +
> + total_time = max_time = 0;
> + min_time = UINT64_MAX;
> +
> + TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST),
> + "BURST_SIZE should be <= %u

Re: [dpdk-dev] [PATCH v5 2/7] app/bbdev: add explicit check for counters

2020-10-26 Thread Tom Rix


On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> Adding explicit check in ut that the stats counters
> have the expect values. Was missing for coverage.

missing from coverage

?

>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Aidan Goddard 
> Acked-by: Dave Burley 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index 3554a77..b62848e 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -4840,6 +4840,23 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   (double)(time_st.deq_max_time * 100) /
>   rte_get_tsc_hz());
>  
> + struct rte_bbdev_stats stats = {0};
Other calls to get_bbdev_queue_stats do not initialize stats and likely should
> + get_bbdev_queue_stats(ad->dev_id, queue_id, &stats);
Should check the return here.
> + if (op_type != RTE_BBDEV_OP_LDPC_DEC) {

This logic seems off.

Do you mean to check only enc stats with an enc op ?

Similar for dec.

> + TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process,
> + "Mismatch in enqueue count %10"PRIu64" %d",
> + stats.enqueued_count, num_to_process);
> + TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process,
> + "Mismatch in dequeue count %10"PRIu64" %d",
> + stats.dequeued_count, num_to_process);
> + }
> + TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0,
> + "Enqueue count Error %10"PRIu64"",
> + stats.enqueue_err_count);
> + TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0,
> + "Dequeue count Error (%10"PRIu64"",
> + stats.dequeue_err_count);
> +
>   return TEST_SUCCESS;
>  #endif
>  }



Re: [dpdk-dev] [PATCH v5 3/7] app/bbdev: include explicit HARQ preloading

2020-10-26 Thread Tom Rix


On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> Run preloading explicitly for unit tests. Load each code block
> by reusing existing input op then restore for the actual test.
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Liu Tianjiao 
> ---
>  app/test-bbdev/main.h|  1 +
>  app/test-bbdev/test_bbdev_perf.c | 51 
> +---
>  2 files changed, 28 insertions(+), 24 deletions(-)
>
> diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h
> index fb3dec8..dc10a50 100644
> --- a/app/test-bbdev/main.h
> +++ b/app/test-bbdev/main.h
> @@ -17,6 +17,7 @@
>  #define TEST_SKIPPED1
>  
>  #define MAX_BURST 512U
> +#define MAX_OPS 1024U

This #define is not consistently used.

ex/ see retrieve_harq_ddr, the old 1024 is still being used.

>  #define DEFAULT_BURST 32U
>  #define DEFAULT_OPS 64U
>  #define DEFAULT_ITER 6U
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index b62848e..f30cbdb 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   bool preload)
>  {
>   uint16_t j;
> - int ret;
> - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
> - struct rte_bbdev_op_data save_hc_in, save_hc_out;
> - struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
> + int deq;
> + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
> + struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
> + struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
>   uint32_t flags = ops[0]->ldpc_dec.op_flags;
>   bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
>   bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
>   bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
>   bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
>   bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
> - for (j = 0; j < n; ++j) {
> - if ((mem_in || hc_in) && preload) {
> - save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
> - save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
> + if ((mem_in || hc_in) && preload) {
> + for (j = 0; j < n; ++j) {
> + save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
> + save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
>   ops[j]->ldpc_dec.op_flags =
>   RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
>   RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;

flags are usually handled with bit operators, not arithmetic.

this seems to be a general issue.

> @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   ops[j]->ldpc_dec.harq_combined_output.offset =
>   harq_offset;
>   ops[j]->ldpc_dec.harq_combined_input.offset = 0;
> - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
> - &ops[j], 1);
> - ret = 0;
> - while (ret == 0)
> - ret = rte_bbdev_dequeue_ldpc_dec_ops(
> - dev_id, queue_id, &ops_deq[j], 1);
> + harq_offset += HARQ_INCR;
> + }
> + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
Add check the return is 'n'
> + deq = 0;
> + while (deq != n)
> + deq += rte_bbdev_dequeue_ldpc_dec_ops(
> + dev_id, queue_id, &ops_deq[deq],
> + n - deq);

Add check the return >= 0

Tom

> + /* Restore the operations */
> + for (j = 0; j < n; ++j) {
>   ops[j]->ldpc_dec.op_flags = flags;
> - ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
> - ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
> + ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
> + ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
>   }
> + }
> + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
> + for (j = 0; j < n; ++j) {
>   /* Adjust HARQ offset when we reach external DDR */
>   if (mem_in || hc_in)
>   ops[j]->ldpc_dec.harq_combined_input.offset
> @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   mbuf_reset(
>   ops_enq[j]->ldpc_dec.harq_combined_output.data);
>   }
> - if (extDdr) {
> - bool preload = i == (TEST_REPETITIONS - 1);
>

Re: [dpdk-dev] [PATCH v5 4/7] app/bbdev: define wait for offload

2020-10-26 Thread Tom Rix


On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> Replacing magic number for default wait time for hw
> offload.
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Liu Tianjiao 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index f30cbdb..39f06db 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -25,6 +25,7 @@
>  
>  #define MAX_QUEUES RTE_MAX_LCORE
>  #define TEST_REPETITIONS 1000
> +#define WAIT_OFFLOAD_US 1000

Why wasn't 200 used ?

Tom

>  
>  #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC
>  #include 
> @@ -4451,7 +4452,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   time_st->enq_acc_total_time += stats.acc_offload_cycles;
>  
>   /* give time for device to process ops */
> - rte_delay_us(200);
> + rte_delay_us(WAIT_OFFLOAD_US);
>  
>   /* Start time meas for dequeue function offload latency */
>   deq_start_time = rte_rdtsc_precise();
> @@ -4542,7 +4543,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   time_st->enq_acc_total_time += stats.acc_offload_cycles;
>  
>   /* give time for device to process ops */
> - rte_delay_us(200);
> + rte_delay_us(WAIT_OFFLOAD_US);
>  
>   /* Start time meas for dequeue function offload latency */
>   deq_start_time = rte_rdtsc_precise();
> @@ -4630,7 +4631,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   time_st->enq_acc_total_time += stats.acc_offload_cycles;
>  
>   /* give time for device to process ops */
> - rte_delay_us(200);
> + rte_delay_us(WAIT_OFFLOAD_US);
>  
>   /* Start time meas for dequeue function offload latency */
>   deq_start_time = rte_rdtsc_precise();
> @@ -4713,7 +4714,7 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   time_st->enq_acc_total_time += stats.acc_offload_cycles;
>  
>   /* give time for device to process ops */
> - rte_delay_us(200);
> + rte_delay_us(WAIT_OFFLOAD_US);
>  
>   /* Start time meas for dequeue function offload latency */
>   deq_start_time = rte_rdtsc_precise();



Re: [dpdk-dev] [PATCH v5 5/7] app/bbdev: skip bler ut when compression is used

2020-10-26 Thread Tom Rix


On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> bler test results are not valid when LLR compression
> is used or for loopback scenarios. Skipping these.
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Aidan Goddard 
> Acked-by: Dave Burley 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index 39f06db..a15ea69 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -3719,7 +3719,11 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   RTE_ALIGN(sizeof(struct thread_params) * num_lcores,
>   RTE_CACHE_LINE_SIZE));
>  
> - if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
> + if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) &&
> + !check_bit(test_vector.ldpc_dec.op_flags,
> + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)
> + && !check_bit(test_vector.ldpc_dec.op_flags,
> + RTE_BBDEV_LDPC_LLR_COMPRESSION))
>   bler_function = bler_pmd_lcore_ldpc_dec;
>   else
>   return TEST_SKIPPED;

Looks ok.

Reviewed-by: Tom Rix 



Re: [dpdk-dev] [PATCH] mlx5: add ConnectX-7 and Bluefield-3 device IDs

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Raslan Darawsheh 
> Sent: Monday, October 26, 2020 13:42
> To: dev@dpdk.org
> Cc: Matan Azrad ; Slava Ovsiienko
> ; Shy Shyman 
> Subject: [PATCH] mlx5: add ConnectX-7 and Bluefield-3 device IDs
> 
> This adds the ConnectX-7 and Bluefield-3 device ids to the list of supported
> Mellanox devices that run the MLX5 PMDs.
> The devices is still in development stage.
> 
> Signed-off-by: Raslan Darawsheh 
> ---
Acked-by: Viacheslav Ovsiienko 



Re: [dpdk-dev] [PATCH v5 6/7] app/bbdev: reduce duration of throughput test

2020-10-26 Thread Tom Rix


On 10/23/20 4:43 PM, Nicolas Chautru wrote:
> Reducing number of repetitions from 1000 to 100
> to save time. Results are accurate enough with
> 100 loops.
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Liu Tianjiao 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index a15ea69..b5dc536 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -24,7 +24,7 @@
>  #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : 
> (socket_id))
>  
>  #define MAX_QUEUES RTE_MAX_LCORE
> -#define TEST_REPETITIONS 1000
> +#define TEST_REPETITIONS 100
>  #define WAIT_OFFLOAD_US 1000
>  
>  #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC

Looks ok

Reviewed-by: Tom Rix 



[dpdk-dev] [PATCH v1] sched: update profile oversubscribe config file

2020-10-26 Thread Savinay Dharmappa
update the profile_ov config file to support dynamic
update of subport rate.

Signed-off-by: Savinay Dharmappa 
---
 examples/qos_sched/profile_ov.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/qos_sched/profile_ov.cfg 
b/examples/qos_sched/profile_ov.cfg
index ab509d28d7..14c89ae340 100644
--- a/examples/qos_sched/profile_ov.cfg
+++ b/examples/qos_sched/profile_ov.cfg
@@ -11,6 +11,9 @@ number of subports per port = 1
 number of pipes per subport = 32
 queue sizes = 64 64 64 64 64 64 64 64 64 64 64 64 64
 
+subport 0-8 = 0
+
+[subport profile 0]
 tb rate = 840   ; Bytes per second
 tb size = 10; Bytes
 
-- 
2.17.1



Re: [dpdk-dev] [PATCH v5 7/7] app/bbdev: update offload test to dequeue full ring

2020-10-26 Thread Tom Rix


On 10/23/20 4:43 PM, Nicolas Chautru wrote:
> update offload dequeue to retrieve the full ring to be
> agnostic of implementation.
>
> Signed-off-by: Nicolas Chautru 
> Acked-by: Aidan Goddard 
> Acked-by: Dave Burley 
> ---
>  app/test-bbdev/test_bbdev_perf.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/app/test-bbdev/test_bbdev_perf.c 
> b/app/test-bbdev/test_bbdev_perf.c
> index b5dc536..a6884c5 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -4463,8 +4463,8 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   /* Dequeue one operation */
This comment and similar need to change, not doing just 1 anymore
>   do {
>   deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
> - &ops_deq[deq], 1);
> - } while (unlikely(deq != 1));
> + &ops_deq[deq], enq);
> + } while (unlikely(deq == 0));

This check looks wrong, should likely be (deq != enq)

Similar below

Tom

>  
>   deq_last_time = rte_rdtsc_precise() - deq_start_time;
>   time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
> @@ -4554,8 +4554,8 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   /* Dequeue one operation */
>   do {
>   deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
> - &ops_deq[deq], 1);
> - } while (unlikely(deq != 1));
> + &ops_deq[deq], enq);
> + } while (unlikely(deq == 0));
>  
>   deq_last_time = rte_rdtsc_precise() - deq_start_time;
>   time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
> @@ -4642,8 +4642,8 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   /* Dequeue one operation */
>   do {
>   deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
> - &ops_deq[deq], 1);
> - } while (unlikely(deq != 1));
> + &ops_deq[deq], enq);
> + } while (unlikely(deq == 0));
>  
>   deq_last_time = rte_rdtsc_precise() - deq_start_time;
>   time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,
> @@ -4725,8 +4725,8 @@ typedef int (test_case_function)(struct active_device 
> *ad,
>   /* Dequeue one operation */
>   do {
>   deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
> - &ops_deq[deq], 1);
> - } while (unlikely(deq != 1));
> + &ops_deq[deq], enq);
> + } while (unlikely(deq == 0));
>  
>   deq_last_time = rte_rdtsc_precise() - deq_start_time;
>   time_st->deq_max_time = RTE_MAX(time_st->deq_max_time,



Re: [dpdk-dev] [PATCH v6 0/4] hash: integrate RCU QSBR

2020-10-26 Thread Dharmik Thakkar



> On Oct 24, 2020, at 4:09 AM, David Marchand  wrote:
> 
> On Thu, Oct 22, 2020 at 12:51 AM Dharmik Thakkar
>  wrote:
>> 
>> Integrate RCU QSBR to make it easier for the applications to use lock
>> free algorithm.
>> 
>> Resource reclamation implementation was split from the original
>> series, and has already been part of RCU library. Rework the series
>> to base hash integration on RCU reclamation APIs.
>> 
>> Refer 'Resource reclamation framework for DPDK' available at [1]
>> to understand various aspects of integrating RCU library
>> into other libraries.
>> 
>> [1] https://doc.dpdk.org/guides/prog_guide/rcu_lib.html
>> 
>> Introduce a new API rte_hash_rcu_qsbr_add for application to
>> register a RCU variable that hash library will use.
>> 
>> Functional tests and performance tests are added to cover the
>> integration with RCU.
> 
> Fixed some style issues/checkpatch warnings.
> The unit test code especially is not really checkpatch compliant,
> maybe worth a separate cleanup later.
> 
> Removed unrelated changes (like removing empty lines, fixing typos in
> unrelated parts).
> 
> Series applied, thanks.
> 
> 
> Little note: checkpatch now has a check on repeated words better than
> a script of mine:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/scripts/checkpatch.pl?id=1a3dcf2e6b35faa1176b9cd8200094fbce16ba19
> Probably worth it for people to update their checkpatch.pl.
> 

Thank you, David! Appreciate your help.

> 
> -- 
> David Marchand



Re: [dpdk-dev] [PATCH v14 1/8] fib: make lookup function type configurable

2020-10-26 Thread David Marchand
Hello Vladimir,

On Sun, Oct 25, 2020 at 7:08 PM Vladimir Medvedkin
 wrote:
> diff --git a/lib/librte_fib/rte_fib.h b/lib/librte_fib/rte_fib.h
> index 84ee774..2097ee5 100644
> --- a/lib/librte_fib/rte_fib.h
> +++ b/lib/librte_fib/rte_fib.h
> @@ -58,6 +58,21 @@ enum rte_fib_dir24_8_nh_sz {
> RTE_FIB_DIR24_8_8B
>  };
>
> +/** Type of lookup function implementation */
> +enum rte_fib_dir24_8_lookup_type {
> +   RTE_FIB_DIR24_8_SCALAR_MACRO,
> +   /**< Macro based lookup function */
> +   RTE_FIB_DIR24_8_SCALAR_INLINE,
> +   /**<
> +* Lookup implementation using inlined functions
> +* for different next hop sizes
> +*/
> +   RTE_FIB_DIR24_8_SCALAR_UNI
> +   /**<
> +* Unified lookup function for all next hop sizes
> +*/
> +};
> +

We can't have a generic function, with a specific type/
Let's have a generic name, in hope it will be extended later for other
fib implementations.
For the default behavior and selecting the "best" possible
implementation, we can introduce a RTE_FIB_LOOKUP_DEFAULT magic value
that would work with any fib type.

How about:

enum rte_fib_lookup_type {
  RTE_FIB_LOOKUP_DEFAULT,
  RTE_FIB_LOOKUP_DIR24_8_SCALAR_MACRO,
  RTE_FIB_LOOKUP_DIR24_8_SCALAR_INLINE,
  RTE_FIB_LOOKUP_DIR24_8_SCALAR_UNI,
  RTE_FIB_LOOKUP_DIR24_8_VECTOR_AVX512,
};


>  /** FIB configuration structure */
>  struct rte_fib_conf {
> enum rte_fib_type type; /**< Type of FIB struct */
> @@ -196,6 +211,23 @@ __rte_experimental
>  struct rte_rib *
>  rte_fib_get_rib(struct rte_fib *fib);
>
> +/**
> + * Set lookup function based on type
> + *
> + * @param fib
> + *   FIB object handle
> + * @param type
> + *   type of lookup function
> + *
> + * @return
> + *-EINVAL on failure
> + *0 on success
> + */
> +__rte_experimental
> +int
> +rte_fib_set_lookup_fn(struct rte_fib *fib,
> +   enum rte_fib_dir24_8_lookup_type type);
> +

_fn does not give much info, how about rte_fib_select_lookup ?


>  #ifdef __cplusplus
>  }
>  #endif


-- 
David Marchand



Re: [dpdk-dev] [PATCH V1 1/1] net/mlx5: support item type error message in flow Verbs

2020-10-26 Thread Slava Ovsiienko
> -Original Message-
> From: Li Zhang 
> Sent: Monday, September 28, 2020 9:56
> To: Dekel Peled ; Ori Kam ; Slava
> Ovsiienko ; Matan Azrad 
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon ;
> Raslan Darawsheh 
> Subject: [PATCH V1 1/1] net/mlx5: support item type error message in flow
> Verbs
> 
> Update the flow verbs error message to "item type X not supported", when it
> is not supported, instead of a generic error message "item not supported".
> 
> Signed-off-by: Li Zhang 
> ---
Acked-by: Viacheslav Ovsiienko 


Re: [dpdk-dev] [PATCH 01/15] examples: enclose DPDK includes with angle brackets

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 8:20 AM, Thomas Monjalon wrote:
> In examples, DPDK header files are external,
> so they must be enclosed with angle brackets, not quotes.
> 
> Signed-off-by: Thomas Monjalon 

Acked-by: Andrew Rybchenko 


Re: [dpdk-dev] [PATCH 02/15] kni: move header file from EAL

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 8:20 AM, Thomas Monjalon wrote:
> Since the kernel module is not part of EAL anymore,
> there is no need to have the common KNI header file in EAL.
> The file rte_kni_common.h is moved to librte_kni.
> 
> Signed-off-by: Thomas Monjalon 

Acked-by: Andrew Rybchenko 



Re: [dpdk-dev] [PATCH 03/15] mbuf: fix typo in dynamic field convention note

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 8:20 AM, Thomas Monjalon wrote:
> Replace "in a in PMD" with "in a PMD".
> 
> Fixes: 4958ca3a443a ("mbuf: support dynamic fields and flags")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Thomas Monjalon 

Acked-by: Andrew Rybchenko 


Re: [dpdk-dev] [PATCH 04/15] node: switch IPv4 metadata to dynamic mbuf field

2020-10-26 Thread Thomas Monjalon
26/10/2020 11:40, David Marchand:
> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
> > +   node_mbuf_priv1_dynfield_offset = 
> > rte_mbuf_dynfield_register(
> > +   &node_mbuf_priv1_dynfield_desc);
> > +   if (node_mbuf_priv1_dynfield_offset < 0)
> > +   return -1;
> 
> How about return -rte_errno like what is done in setup_lpm()?

Yes, looks better

[...]
> > +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> > +   .name = "rte_node_dynfield_priv1",
> > +   .size = sizeof(uint64_t),
> > +   .align = __alignof__(uint64_t),
> 
> s/uint64_t/struct node_mbuf_priv1/g ?

Yes will do




Re: [dpdk-dev] [PATCH 05/15] security: switch metadata to dynamic mbuf field

2020-10-26 Thread Thomas Monjalon
26/10/2020 11:41, David Marchand:
> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
> > +/* Dynamic mbuf field for device-specific metadata */
> > +static const struct rte_mbuf_dynfield rte_security_dynfield_desc = {
> > +   .name = RTE_SECURITY_DYNFIELD_NAME,
> > +   .size = sizeof(RTE_SECURITY_DYNFIELD_TYPE),
> > +   .align = __alignof__(RTE_SECURITY_DYNFIELD_TYPE),
> > +};
> 
> Should be in rte_security.c?

Yes,
and the inline function below can be a simple macro.

> > +extern int rte_security_dynfield_offset;
> > +
> > +__rte_experimental
> > +int rte_security_dynfield_register(void);
> > +
> > +static inline RTE_SECURITY_DYNFIELD_TYPE *
> > +rte_security_dynfield(struct rte_mbuf *mbuf)
> > +{
> > +   return RTE_MBUF_DYNFIELD(mbuf,
> > +   rte_security_dynfield_offset, RTE_SECURITY_DYNFIELD_TYPE *);
> > +}





[dpdk-dev] [PATCH] Add build option to enable/disable AVX2 support

2020-10-26 Thread Felix Moessbauer
This patch introduces a meson option to disable the AVX2 support.
If the build should be for a target without AVX2 support, the
know can be turned to false, even if the compiler supports AVX2.

Signed-off-by: Felix Moessbauer 
---
 meson.build   | 3 ++-
 meson_options.txt | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 0ee98be..2e2380c 100644
--- a/meson.build
+++ b/meson.build
@@ -18,7 +18,8 @@ pktgen_conf = configuration_data()
 cc = meson.get_compiler('c')
 
 add_project_arguments('-march=native', language: 'c')
-if cc.has_argument('-mavx2')
+
+if get_option('enable-avx2') and cc.has_argument('-mavx2')
add_project_arguments('-mavx2', language: 'c')
 endif
 add_project_arguments('-DALLOW_EXPERIMENTAL_API', language: 'c')
diff --git a/meson_options.txt b/meson_options.txt
index 7b42577..c419517 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -1,3 +1,4 @@
 option('enable_lua', type: 'boolean', value: false, description: 'Enable Lua 
support')
 option('enable_gui', type: 'boolean', value: false, description: 'build the 
gui')
 option('enable_docs', type: 'boolean', value: false, description: 'build 
documentation')
+option('enable-avx2', type: 'boolean', value: true, description: 'compile with 
AVX2 support')
-- 
2.20.1



Re: [dpdk-dev] [PATCH 08/15] net/bnxt: switch CFA code to dynamic mbuf field

2020-10-26 Thread Thomas Monjalon
26/10/2020 11:42, David Marchand:
> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
> >
> > The CFA code from mark was stored in the deprecated mbuf field udata64.
> > It is moved to a dynamic field in order to allow removal of udata64.
> 
> We convert from a 64 bits (with only upper 32 bits filled) to a 32 bits field.
> Worth a comment on the API change for users (if any).

Yes
There is no good place to note this change except in the commit log.




Re: [dpdk-dev] [PATCH 14/15] examples/rxtx_callbacks: switch to dynamic mbuf field

2020-10-26 Thread Thomas Monjalon
26/10/2020 11:43, David Marchand:
> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
> >
> > The example used the deprecated mbuf field udata64.
> > It is moved to a dynamic field in order to allow removal of udata64.
> >
> > Note: RTE_MBUF_DYNFIELD_TIMESTAMP_NAME is an existing mbuf field name.
> 
> I am a bit lost.
> How is this going to work as the mbuf timestamp field is used in this
> example too?

Oh, you're right!
I will change the naming scheme to a custom "TSC" field.




Re: [dpdk-dev] [PATCH 04/15] node: switch IPv4 metadata to dynamic mbuf field

2020-10-26 Thread Andrew Rybchenko
On 10/26/20 5:29 PM, Thomas Monjalon wrote:
> 26/10/2020 11:40, David Marchand:
>> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  wrote:
>>> +   node_mbuf_priv1_dynfield_offset = 
>>> rte_mbuf_dynfield_register(
>>> +   &node_mbuf_priv1_dynfield_desc);
>>> +   if (node_mbuf_priv1_dynfield_offset < 0)
>>> +   return -1;
>>
>> How about return -rte_errno like what is done in setup_lpm()?
> 
> Yes, looks better
> 
> [...]
>>> +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
>>> +   .name = "rte_node_dynfield_priv1",
>>> +   .size = sizeof(uint64_t),
>>> +   .align = __alignof__(uint64_t),
>>
>> s/uint64_t/struct node_mbuf_priv1/g ?
> 
> Yes will do
> 

Just to be sure - "struct node_mbuf_priv1 *"


Re: [dpdk-dev] [PATCH 04/15] node: switch IPv4 metadata to dynamic mbuf field

2020-10-26 Thread Thomas Monjalon
26/10/2020 15:34, Andrew Rybchenko:
> On 10/26/20 5:29 PM, Thomas Monjalon wrote:
> > 26/10/2020 11:40, David Marchand:
> >> On Mon, Oct 26, 2020 at 6:21 AM Thomas Monjalon  
> >> wrote:
> >>> +   node_mbuf_priv1_dynfield_offset = 
> >>> rte_mbuf_dynfield_register(
> >>> +   &node_mbuf_priv1_dynfield_desc);
> >>> +   if (node_mbuf_priv1_dynfield_offset < 0)
> >>> +   return -1;
> >>
> >> How about return -rte_errno like what is done in setup_lpm()?
> > 
> > Yes, looks better
> > 
> > [...]
> >>> +static const struct rte_mbuf_dynfield node_mbuf_priv1_dynfield_desc = {
> >>> +   .name = "rte_node_dynfield_priv1",
> >>> +   .size = sizeof(uint64_t),
> >>> +   .align = __alignof__(uint64_t),
> >>
> >> s/uint64_t/struct node_mbuf_priv1/g ?
> > 
> > Yes will do
> > 
> 
> Just to be sure - "struct node_mbuf_priv1 *"

Yes, pointer to node_mbuf_priv1.




  1   2   3   >