RE: [PATCH v3 1/6] ethdev: add L2TPv2 RSS offload type

2022-01-30 Thread Ori Kam
Hi Jie,

> -Original Message-
> From: Jie Wang 
> Sent: Saturday, January 29, 2022 8:24 AM
> Subject: [PATCH v3 1/6] ethdev: add L2TPv2 RSS offload type
> 
> This patch defines new RSS offload type for L2TPv2, which
> is required when users want to distribute packets based on
> the L2TPv2 session ID field.
> 
> Signed-off-by: Jie Wang 
> ---
>  app/test-pmd/cmdline.c | 10 ++
>  app/test-pmd/config.c  |  3 ++-
>  doc/guides/rel_notes/release_22_03.rst |  5 +
>  lib/ethdev/rte_ethdev.h|  7 +++
>  4 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index e626b1c7d9..d535311f21 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -2178,7 +2178,7 @@ cmd_config_rss_parsed(void *parsed_result,
>   RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_SCTP |
>   RTE_ETH_RSS_L2_PAYLOAD | RTE_ETH_RSS_L2TPV3 | 
> RTE_ETH_RSS_ESP |
>   RTE_ETH_RSS_AH | RTE_ETH_RSS_PFCP | RTE_ETH_RSS_GTPU |
> - RTE_ETH_RSS_ECPRI;
> + RTE_ETH_RSS_ECPRI | RTE_ETH_RSS_L2TPV2;
>   else if (!strcmp(res->value, "eth"))
>   rss_conf.rss_hf = RTE_ETH_RSS_ETH;
>   else if (!strcmp(res->value, "vlan"))
> @@ -2256,6 +2256,8 @@ cmd_config_rss_parsed(void *parsed_result,
>   rss_conf.rss_hf = (rss_hf | RTE_ETH_RSS_LEVEL_INNERMOST);
>   } else if (!strcmp(res->value, "default"))
>   use_default = 1;
> + else if (!strcmp(res->value, "l2tpv2"))
> + rss_conf.rss_hf = RTE_ETH_RSS_L2TPV2;
>   else if (isdigit(res->value[0]) && atoi(res->value) > 0 &&
>   atoi(res->value) < 64)
>   rss_conf.rss_hf = 1ULL << atoi(res->value);
> @@ -2314,7 +2316,7 @@ cmdline_parse_inst_t cmd_config_rss = {
>   .help_str = "port config all rss "
>   "all|default|eth|vlan|ip|tcp|udp|sctp|ether|port|vxlan|geneve|"
>   
> "nvgre|vxlan-gpe|l2tpv3|esp|ah|pfcp|ecpri|mpls|none|level-default|"
> - "level-outer|level-inner|ipv4-chksum|",
> + "level-outer|level-inner|ipv4-chksum|l2tpv2|",
>   .tokens = {
>   (void *)&cmd_config_rss_port,
>   (void *)&cmd_config_rss_keyword,
> @@ -2429,7 +2431,7 @@ cmdline_parse_token_string_t 
> cmd_config_rss_hash_key_rss_type =
>"ipv6-tcp-ex#ipv6-udp-ex#"
>
> "l3-src-only#l3-dst-only#l4-src-only#l4-dst-only#"
>"l2-src-only#l2-dst-only#s-vlan#c-vlan#"
> -  "l2tpv3#esp#ah#pfcp#pppoe#gtpu#ecpri#mpls");
> +  
> "l2tpv3#esp#ah#pfcp#pppoe#gtpu#ecpri#mpls#l2tpv2");
>  cmdline_parse_token_string_t cmd_config_rss_hash_key_value =
>   TOKEN_STRING_INITIALIZER(struct cmd_config_rss_hash_key, key, NULL);
> 
> @@ -2442,7 +2444,7 @@ cmdline_parse_inst_t cmd_config_rss_hash_key = {
>   "l2-payload|ipv6-ex|ipv6-tcp-ex|ipv6-udp-ex|"
>   "l3-src-only|l3-dst-only|l4-src-only|l4-dst-only|"
>   "l2-src-only|l2-dst-only|s-vlan|c-vlan|"
> - "l2tpv3|esp|ah|pfcp|pppoe|gtpu|ecpri|mpls "
> + "l2tpv3|esp|ah|pfcp|pppoe|gtpu|ecpri|mpls|l2tpv2 "
>   "",
>   .tokens = {
>   (void *)&cmd_config_rss_hash_key_port,
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 1722d6c8f8..ec922bd304 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -90,7 +90,7 @@ const struct rss_type_info rss_type_table[] = {
>   { "all", RTE_ETH_RSS_ETH | RTE_ETH_RSS_VLAN | RTE_ETH_RSS_IP | 
> RTE_ETH_RSS_TCP |
>   RTE_ETH_RSS_UDP | RTE_ETH_RSS_SCTP | RTE_ETH_RSS_L2_PAYLOAD |
>   RTE_ETH_RSS_L2TPV3 | RTE_ETH_RSS_ESP | RTE_ETH_RSS_AH | 
> RTE_ETH_RSS_PFCP
> |
> - RTE_ETH_RSS_GTPU | RTE_ETH_RSS_ECPRI | RTE_ETH_RSS_MPLS},
> + RTE_ETH_RSS_GTPU | RTE_ETH_RSS_ECPRI | RTE_ETH_RSS_MPLS |
> RTE_ETH_RSS_L2TPV2},
>   { "none", 0 },
>   { "eth", RTE_ETH_RSS_ETH },
>   { "l2-src-only", RTE_ETH_RSS_L2_SRC_ONLY },
> @@ -143,6 +143,7 @@ const struct rss_type_info rss_type_table[] = {
>   { "mpls", RTE_ETH_RSS_MPLS },
>   { "ipv4-chksum", RTE_ETH_RSS_IPV4_CHKSUM },
>   { "l4-chksum", RTE_ETH_RSS_L4_CHKSUM },
> + { "l2tpv2", RTE_ETH_RSS_L2TPV2 },
>   { NULL, 0 },
>  };
> 
> diff --git a/doc/guides/rel_notes/release_22_03.rst 
> b/doc/guides/rel_notes/release_22_03.rst
> index 33be3241b9..9a507ab9ea 100644
> --- a/doc/guides/rel_notes/release_22_03.rst
> +++ b/doc/guides/rel_notes/release_22_03.rst
> @@ -55,6 +55,11 @@ New Features
>   Also, make sure to start the actual text at the margin.
>   ===
> 
> +* **Added new RSS offload types for L2TPv2 in RSS flow.**

RE: [PATCH v3 2/6] net: fix L2TPv2 common header

2022-01-30 Thread Ori Kam



> -Original Message-
> From: Jie Wang 
> Sent: Saturday, January 29, 2022 8:25 AM
> To: dev@dpdk.org
> Cc: stevex.y...@intel.com; Ori Kam ; 
> aman.deep.si...@intel.com;
> ferruh.yi...@intel.com; NBU-Contact-Thomas Monjalon (EXTERNAL) 
> ;
> andrew.rybche...@oktetlabs.ru; jingjing...@intel.com; beilei.x...@intel.com;
> qi.z.zh...@intel.com; olivier.m...@6wind.com; Jie Wang 
> ; sta...@dpdk.org
> Subject: [PATCH v3 2/6] net: fix L2TPv2 common header
> 
> The fields of L2TPv2 common header were reversed in big endian and
> little endian.
> 
> This patch fixes this error to ensure L2TPv2 can be parsed correctly.
> 
> Fixes: 3a929df1f286 ("ethdev: support L2TPv2 and PPP procotol")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Jie Wang 
> ---
>  lib/net/rte_l2tpv2.h | 20 ++--
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/net/rte_l2tpv2.h b/lib/net/rte_l2tpv2.h
> index 938a993b48..1f3ad3f03c 100644
> --- a/lib/net/rte_l2tpv2.h
> +++ b/lib/net/rte_l2tpv2.h
> @@ -89,16 +89,6 @@ struct rte_l2tpv2_common_hdr {
>   __extension__
>   struct {
>  #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> - uint16_t t:1;   /**< message Type */
> - uint16_t l:1;   /**< length option bit */
> - uint16_t res1:2;/**< reserved */
> - uint16_t s:1;   /**< ns/nr option bit */
> - uint16_t res2:1;/**< reserved */
> - uint16_t o:1;   /**< offset option bit */
> - uint16_t p:1;   /**< priority option bit */
> - uint16_t res3:4;/**< reserved */
> - uint16_t ver:4; /**< protocol version */
> -#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>   uint16_t ver:4; /**< protocol version */
>   uint16_t res3:4;/**< reserved */
>   uint16_t p:1;   /**< priority option bit */
> @@ -108,6 +98,16 @@ struct rte_l2tpv2_common_hdr {
>   uint16_t res1:2;/**< reserved */
>   uint16_t l:1;   /**< length option bit */
>   uint16_t t:1;   /**< message Type */
> +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> + uint16_t t:1;   /**< message Type */
> + uint16_t l:1;   /**< length option bit */
> + uint16_t res1:2;/**< reserved */
> + uint16_t s:1;   /**< ns/nr option bit */
> + uint16_t res2:1;/**< reserved */
> + uint16_t o:1;   /**< offset option bit */
> + uint16_t p:1;   /**< priority option bit */
> + uint16_t res3:4;/**< reserved */
> + uint16_t ver:4; /**< protocol version */
>  #endif
>   };
>   };
> --
> 2.25.1

Acked-by: Ori Kam 
Best,
Ori



RE: [PATCH v3 3/6] app/testpmd: add 6 types of L2TPv2 message

2022-01-30 Thread Ori Kam



> -Original Message-
> From: Jie Wang 
> Sent: Saturday, January 29, 2022 8:25 AM
> Subject: [PATCH v3 3/6] app/testpmd: add 6 types of L2TPv2 message
> 
> This patch adds L2TPv2 control message and 5 types of data message
> support for testpmd.
> 
> The added L2TPv2 message types are listed below:
> 1. L2TPv2 control
> 2. L2TPv2
> 3. L2TPv2 + length option
> 4. L2TPv2 + sequence option
> 5. L2TPv2 + offset option
> 6. L2TPv2 + length option + sequence option
> 
> Signed-off-by: Jie Wang 
> ---


Acked-by: Ori Kam 
Best,
Ori


RE: [PATCH] app/testpmd: fix GENEVE parsing in csum forward mode

2022-01-30 Thread Raja Zidane
I didn't want to remove the default parsing of tunnel as VxLan because I 
thought it might be used,
Instead I moved it to the end, which makes it detect all supported tunnel 
through udp_dst_port,
And only if no tunnel was matched it would default to VxLan.
That was the reason geneve weren't detected and parsed as vxlan instead, which 
is the bug I was trying to solve.

-Original Message-
From: Singh, Aman Deep  
Sent: Thursday, January 20, 2022 12:47 PM
To: Matan Azrad ; Ferruh Yigit ; Raja 
Zidane ; dev@dpdk.org
Cc: sta...@dpdk.org
Subject: Re: [PATCH] app/testpmd: fix GENEVE parsing in csum forward mode

External email: Use caution opening links or attachments


On 1/18/2022 6:49 PM, Matan Azrad wrote:
>
>> -Original Message-
>> From: Ferruh Yigit 
>> Sent: Tuesday, January 18, 2022 3:03 PM
>> To: Matan Azrad ; Raja Zidane ; 
>> dev@dpdk.org
>> Cc: sta...@dpdk.org
>> Subject: Re: [PATCH] app/testpmd: fix GENEVE parsing in csum forward 
>> mode
>>
>> External email: Use caution opening links or attachments
>>
>>
>> On 1/18/2022 12:55 PM, Matan Azrad wrote:
>>>
 -Original Message-
 From: Ferruh Yigit 
 Sent: Tuesday, January 18, 2022 2:28 PM
 To: Matan Azrad ; Raja Zidane 
 ; dev@dpdk.org
 Cc: sta...@dpdk.org
 Subject: Re: [PATCH] app/testpmd: fix GENEVE parsing in csum 
 forward mode

 External email: Use caution opening links or attachments


 On 1/18/2022 11:27 AM, Matan Azrad wrote:
>
>> -Original Message-
>> From: Ferruh Yigit 
>> Sent: Tuesday, January 18, 2022 11:52 AM
>> To: Raja Zidane ; dev@dpdk.org
>> Cc: Matan Azrad ; sta...@dpdk.org
>> Subject: Re: [PATCH] app/testpmd: fix GENEVE parsing in csum 
>> forward mode
>>
>> External email: Use caution opening links or attachments
>>
>>
>> On 12/5/2021 3:44 AM, Raja Zidane wrote:
>>> The csum FWD mode parses any received packet to set mbuf 
>>> offloads for the transmitting burst, mainly in the checksum/TSO areas.
>>> In the case of a tunnel header, the csum FWD tries to detect 
>>> known tunnels by the standard definition using the header'sdata 
>>> and fallback to check the packet type in the mbuf to see if the 
>>> Rx port driver already sign the packet as a tunnel.
>>> In the fallback case, the csum assumes the tunnel is VXLAN and 
>>> parses the tunnel as VXLAN.
>> As far as I can see there is a VXLAN port check in 
>> 'parse_vxlan()', why it is not helping?
>>
> The problem is not the vxlan check but the tunnel type in mbuf 
> that caused the
 packet to be detected as vxlan(default) before checking GENEVE tunnel case.
 Check is as following:

if (udp_hdr->dst_port != _htons(RTE_VXLAN_DEFAULT_PORT) &&
RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
return;

 Do you what is the intention for the 
 "RTE_ETH_IS_TUNNEL_PKT(pkt_type) ==
>> 0"
 check?
 Why vxlan parsing doesn't stop when it is not default port?
>>> Maybe some drivers set the tunnel type for vxlan packets coming 
>>> after non-
>> standard vxlan port.
>> But checking the tunnel flag to say that it is vxlan is too broad, isn't it?
>> And this is the problem you are having.
>>
>> Can there be any way to detect and check non-standard vxlan port?
> Maybe yes, but it is probably more complex solusion.
>
> See this patch:
> https://patches.dpdk.org/project/dpdk/patch/1423819371-24222-13-git-se
> nd-email-olivier.m...@6wind.com/
>
> comments there:
> 1. check udp destination port, 4789 is the default vxlan port (rfc7348).
> 2. currently, this flag is set by i40e only if the packet is vxlan
>
> And maybe another driver assumes more ports here.
>
> Collecting all the non-standard ports can start from i40 maintainers😊
I think, here we should check for supported tunnel types only.  The i40 driver 
setting a Tunnel flag, does not means that it VxLan type only.
As in your case those were GENEVE packets getting treated as VxLan.

Making parse_vxlan() udp_dst_port specific can avoid this issue.

>>> When the GENEVE tunnel was added to the known tunnels in csum, 
>>> its parsing trial was wrongly located after the pkt type 
>>> detection, causing the csum to parse the GENEVE header as VXLAN 
>>> when the Rx port set the tunnel packet type.
>>>
>>> Locate the GENEVE parsing trial before the packet type detection.
>>>
>>> Fixes: ea0e711b8ae0 ("app/testpmd: add GENEVE parsing")
>>> Cc: sta...@dpdk.org
>>>
>>> Signed-off-by: Raja Zidane 
>>> ---
>>> Acked-by: Matan Azrad 
>> Ack should be before '---' to be part of the commit log, 
>> otherwise it is dropped when applied as comment.
>>
>>>  app/test-pmd/csumonly.c | 16 ++--
>>>  1 file changed, 10 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/c

RE: [EXT] Re: [PATCH v2 2/4] ethdev: add dev op to set/get IP reassembly configuration

2022-01-30 Thread Akhil Goyal
Hi Andrew,

> On 1/20/22 19:26, Akhil Goyal wrote:
> > A new ethernet device op is added to give application control over
> 
> ethernet -> Ethernet

Ok
> 
> > the IP reassembly configuration. This operation is an optional
> > call from the application, default values are set by PMD and
> > exposed via rte_eth_dev_info.
> 
> Are defaults or maximum support values exposed via rte_eth_dev_info?
> I guess it should be maximum. Defaults can be obtained using
> get without set.
> 

Rte_eth_dev_info gives the maximum values/capabilities that a device can support
And also the default values set if user does not call set() API.

And get() op will give the currently set values.

> > Application should always first retrieve the capabilities from
> > rte_eth_dev_info and then set the fields accordingly.
> > User can get the currently set values using the get API.
> >
> > Signed-off-by: Akhil Goyal 
> 
> [snip]
> 
> 
> > +/**
> > + * @internal
> > + * Set configuration parameters for enabling IP reassembly offload in
> hardware.
> > + *
> > + * @param dev
> > + *   Port (ethdev) handle
> > + *
> > + * @param[in] conf
> > + *   Configuration parameters for IP reassembly.
> > + *
> > + * @return
> > + *   Negative errno value on error, zero otherwise
> > + */
> > +typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
> > +  struct rte_eth_ip_reass_params *conf);
> 
> const
> 
> [snip]
> 
> > +int
> > +rte_eth_ip_reassembly_conf_get(uint16_t port_id,
> > +  struct rte_eth_ip_reass_params *conf)
> 
> Please, preserve order everywhere. If get comes first, it must be first
> everywhere.
ok
> 
> > +{
> > +   struct rte_eth_dev *dev;
> > +
> > +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> > +   dev = &rte_eth_devices[port_id];
> > +
> > +   if (conf == NULL) {
> > +   RTE_ETHDEV_LOG(ERR, "Cannot get reassembly info to NULL");
> > +   return -EINVAL;
> > +   }
> 
> Why is order of check different in set and get?
Ok will correct it.
> 
> > +
> > +   if (dev->data->dev_configured == 0) {
> > +   RTE_ETHDEV_LOG(ERR,
> > +   "Device with port_id=%"PRIu16" is not configured.\n",
> > +   port_id);
> > +   return -EINVAL;
> > +   }
> > +
> > +   if ((dev->data->dev_conf.rxmode.offloads &
> > +   RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
> > +   RTE_ETHDEV_LOG(ERR,
> > +   "The port (ID=%"PRIu16") is not configured for IP
> reassembly\n",
> > +   port_id);
> > +   return -EINVAL;
> > +   }
> > +
> > +   RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >ip_reassembly_conf_get,
> > +   -ENOTSUP);
> > +   memset(conf, 0, sizeof(struct rte_eth_ip_reass_params));
> > +   return eth_err(port_id,
> > +  (*dev->dev_ops->ip_reassembly_conf_get)(dev, conf));
> > +}
> > +
> >   RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
> >
> >   RTE_INIT(ethdev_init_telemetry)
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> > index 11427b2e4d..53af158bcb 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -5218,6 +5218,57 @@ int rte_eth_representor_info_get(uint16_t port_id,
> >   __rte_experimental
> >   int rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t *features);
> >
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice
> > + *
> > + * Get IP reassembly configuration parameters currently set in PMD,
> > + * if device rx offload flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is
> 
> rx -> Rx
> 
> > + * enabled and the PMD supports IP reassembly offload.
> > + *
> > + * @param port_id
> > + *   The port identifier of the device.
> > + * @param conf
> > + *   A pointer to rte_eth_ip_reass_params structure.
> > + * @return
> > + *   - (-ENOTSUP) if offload configuration is not supported by device.
> > + *   - (-EINVAL) if offload is not enabled in rte_eth_conf.
> > + *   - (-ENODEV) if *port_id* invalid.
> > + *   - (-EIO) if device is removed.
> > + *   - (0) on success.
> > + */
> > +__rte_experimental
> > +int rte_eth_ip_reassembly_conf_get(uint16_t port_id,
> > +  struct rte_eth_ip_reass_params *conf);
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice
> > + *
> > + * Set IP reassembly configuration parameters if device rx offload
> 
> rx -> Rx
> 
Ok

> > + * flag (RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) is enabled and the PMD
> > + * supports IP reassembly offload. User should first check the
> > + * reass_capa in rte_eth_dev_info before setting the configuration.
> > + * The values of configuration parameters must not exceed the device
> > + * capabilities.
> 
> It sounds like set API should retrieve dev_info and check set
> values vs maximums.

Yes.

> 
> > The use of this API is optional and if called, it
> > + * should be called before rte_eth_dev_

RE: [EXT] Re: [PATCH 1/8] ethdev: introduce IP reassembly offload

2022-01-30 Thread Akhil Goyal
Hi Andrew,
Thanks for the review.
> On 1/3/22 18:08, Akhil Goyal wrote:
> > IP Reassembly is a costly operation if it is done in software.
> > The operation becomes even more costlier if IP fragmants are encrypted.
> > However, if it is offloaded to HW, it can considerably save application 
> > cycles.
> >
> > Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced
> in
> > ethdev for devices which can attempt reassembly of packets in hardware.
> > rte_eth_dev_info is updated with the reassembly capabilities which a device
> > can support.
> 
> Yes, reassembly is really complicated process taking possibility to have
> overlapping fragments, out-of-order etc.
> There are network attacks based on IP reassembly.
> Will it simply result in IP reassembly failure if no buffers are left
> for IP fragments? What will be reported in mbuf if some packets overlap?
> Just raw packets as is or reassembly result with holes?
> I think behavour should be specified/

The PMD will set the reassembly incomplete dynflag and the user can retrieve the
Mbufs using dynfields. This is shown in v2 of the patchset and a test app is 
also added
To cover this negative case.

> 
> > The resulting reassembled packet would be a typical segmented mbuf in
> > case of success.
> >
> > And if reassembly of fragments is failed or is incomplete (if fragments do
> > not come before the reass_timeout), the mbuf ol_flags can be updated.
> > This is updated in a subsequent patch.
> >
> > Signed-off-by: Akhil Goyal 
> > ---
> >   doc/guides/nics/features.rst | 12 
> >   lib/ethdev/rte_ethdev.c  |  1 +
> >   lib/ethdev/rte_ethdev.h  | 32 +++-
> >   3 files changed, 44 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
> > index 27be2d2576..1dfdee9602 100644
> > --- a/doc/guides/nics/features.rst
> > +++ b/doc/guides/nics/features.rst
> > @@ -602,6 +602,18 @@ Supports inner packet L4 checksum.
> >
> ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UD
> P_CKSUM``.
> >
> >
> > +.. _nic_features_ip_reassembly:
> > +
> > +IP reassembly
> > +-
> > +
> > +Supports IP reassembly in hardware.
> > +
> > +* **[uses] rte_eth_rxconf,rte_eth_rxmode**:
> ``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
> 
> Looking at the patch I see no changes and usage of rte_eth_rxconf and
> rte_eth_rxmode. It should be added here later if corresponding changes
> come in subsequent patches.
> 
> > +* **[provides] mbuf**:
> ``mbuf.ol_flags:RTE_MBUF_F_RX_IP_REASSEMBLY_INCOMPLETE``
> 
> Same here. The flag is not defined yet. So, it must not be mentioned in
> the patch.
> .
Ok

> > +* **[provides] rte_eth_dev_info**: ``reass_capa``.
> > +
> > +
> >   .. _nic_features_shared_rx_queue:
> >
> >   Shared Rx queue
> 
> 
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> > index fa299c8ad7..11427b2e4d 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> 
> [snip]
> 
> > @@ -1781,6 +1782,33 @@ enum rte_eth_representor_type {
> > RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
> >   };
> >
> > +/* Flag to offload IP reassembly for IPv4 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
> > +/* Flag to offload IP reassembly for IPv6 packets. */
> > +#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this structure may change without prior notice.
> > + *
> > + * A structure used to set IP reassembly configuration.
> 
> In the patch the structure is used to provide capabilities,
> not to set configuration.
> 
> If you are going to use the same structure in capabilities and
> configuration, it could be handy, but really confusing since
> interpretation of fields should be different.
> As a bare minimum the difference must be specified in comments.
> Right now all fields makes sense in capabilities and configuration:
> maximum possible vs actual value, however, not everything could be
> really configurable and it will become confusing. It is really hard
> to discuss right now since the patch does not provide usage of the
> structure for the configuration.

The idea is to use it both for capabilities as well as configuration of those 
values.
And from library perspective not creating any limitation about which param can
Be configured which cannot be configured.
However will add a comment to specify both usage.
Could you please check v2 and test app for the usage?

> 
> > + *
> > + * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
> > + * the PMD will attempt IP reassembly for the received packets as per
> > + * properties defined in this structure:
> > + *
> > + */
> > +struct rte_eth_ip_reass_params {
> > +   /** Maximum time in ms which PMD can wait for other fragments. */
> > +   uint32_t reass_timeout;
> 
> Please, specify units. May be even in field name. E.g. reass_timeout_ms.
Ok


> 
> 

[PATCH v3 0/4] ethdev: introduce IP reassembly offload

2022-01-30 Thread Akhil Goyal
As discussed in the RFC[1] sent in 21.11, a new offload is
introduced in ethdev for IP reassembly.

This patchset add the IP reassembly RX offload.
Currently, the offload is tested along with inline IPsec processing.
It can also be updated as a standalone offload without IPsec, if there
are some hardware available to test it.
The patchset is tested on cnxk platform. The driver implementation
and a test app are added as separate patchsets.

[1]: 
http://patches.dpdk.org/project/dpdk/patch/20210823100259.1619886-1-gak...@marvell.com/

changes in v3:
- incorporated comments from Andrew and Stephen Hemminger

changes in v2:
- added abi ignore exceptions for modifications in reserved fields.
  Added a crude way to subside the rte_security and rte_ipsec ABI issue.
  Please suggest a better way.
- incorporated Konstantin's comment for extra checks in new API
  introduced.
- converted static mbuf ol_flag to mbuf dynflag (Konstantin)
- added a get API for reassembly configuration (Konstantin)
- Fixed checkpatch issues.
- Dynfield is NOT split into 2 parts as it would cause an extra fetch in
  case of IP reassembly failure.
- Application patches are split into a separate series.


Akhil Goyal (4):
  ethdev: introduce IP reassembly offload
  ethdev: add dev op to set/get IP reassembly configuration
  ethdev: add mbuf dynfield for incomplete IP reassembly
  security: add IPsec option for IP reassembly

 devtools/libabigail.abignore |  19 ++
 doc/guides/nics/features.rst |  12 
 lib/ethdev/ethdev_driver.h   |  45 +++
 lib/ethdev/rte_ethdev.c  | 109 +++
 lib/ethdev/rte_ethdev.h  | 100 +++-
 lib/ethdev/version.map   |   5 ++
 lib/security/rte_security.h  |  12 +++-
 7 files changed, 300 insertions(+), 2 deletions(-)

-- 
2.25.1



[PATCH v3 1/4] ethdev: introduce IP reassembly offload

2022-01-30 Thread Akhil Goyal
IP Reassembly is a costly operation if it is done in software.
The operation becomes even more costlier if IP fragments are encrypted.
However, if it is offloaded to HW, it can considerably save application
cycles.

Hence, a new offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is introduced in
ethdev for devices which can attempt reassembly of packets in hardware.
rte_eth_dev_info is updated with the reassembly capabilities which a device
can support.

The resulting reassembled packet would be a typical segmented mbuf in
case of success.

And if reassembly of fragments is failed or is incomplete (if fragments do
not come before the reass_timeout), the mbuf ol_flags can be updated.
This is updated in a subsequent patch.

Signed-off-by: Akhil Goyal 
Acked-by: Konstantin Ananyev 
---
 devtools/libabigail.abignore |  5 +
 doc/guides/nics/features.rst | 11 +++
 lib/ethdev/rte_ethdev.c  |  1 +
 lib/ethdev/rte_ethdev.h  | 28 +++-
 4 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
index 4b676f317d..90f449c43a 100644
--- a/devtools/libabigail.abignore
+++ b/devtools/libabigail.abignore
@@ -11,3 +11,8 @@
 ; Ignore generated PMD information strings
 [suppress_variable]
 name_regexp = _pmd_info$
+
+; Ignore fields inserted in place of reserved_64s of rte_eth_dev_info
+[suppress_type]
+   name = rte_eth_dev_info
+   has_data_member_inserted_between = {offset_of(reserved_64s), end}
diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 27be2d2576..b45bce4a78 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -602,6 +602,17 @@ Supports inner packet L4 checksum.
   ``tx_offload_capa,tx_queue_offload_capa:RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM``.
 
 
+.. _nic_features_ip_reassembly:
+
+IP reassembly
+-
+
+Supports IP reassembly in hardware.
+
+* **[uses] rte_eth_rxconf,rte_eth_rxmode**: 
``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
+* **[provides] rte_eth_dev_info**: ``reass_capa``.
+
+
 .. _nic_features_shared_rx_queue:
 
 Shared Rx queue
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a1d475a292..d9a03f12f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -126,6 +126,7 @@ static const struct {
RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
RTE_RX_OFFLOAD_BIT2STR(RSS_HASH),
RTE_RX_OFFLOAD_BIT2STR(BUFFER_SPLIT),
+   RTE_RX_OFFLOAD_BIT2STR(IP_REASSEMBLY),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index fa299c8ad7..cfaf7a5afc 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1586,6 +1586,7 @@ struct rte_eth_conf {
 #define RTE_ETH_RX_OFFLOAD_RSS_HASH RTE_BIT64(19)
 #define DEV_RX_OFFLOAD_RSS_HASH RTE_ETH_RX_OFFLOAD_RSS_HASH
 #define RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT RTE_BIT64(20)
+#define RTE_ETH_RX_OFFLOAD_IP_REASSEMBLYRTE_BIT64(21)
 
 #define RTE_ETH_RX_OFFLOAD_CHECKSUM (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
@@ -1781,6 +1782,29 @@ enum rte_eth_representor_type {
RTE_ETH_REPRESENTOR_PF,   /**< representor of Physical Function. */
 };
 
+/* Flag to offload IP reassembly for IPv4 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV4 (RTE_BIT32(0))
+/* Flag to offload IP reassembly for IPv6 packets. */
+#define RTE_ETH_DEV_REASSEMBLY_F_IPV6 (RTE_BIT32(1))
+/**
+ * A structure used to get/set IP reassembly configuration.
+ *
+ * If RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY flag is set in offloads field,
+ * the PMD will attempt IP reassembly for the received packets as per
+ * properties defined in this structure.
+ */
+struct rte_eth_ip_reass_params {
+   /** Maximum time in ms which PMD can wait for other fragments. */
+   uint32_t reass_timeout_ms;
+   /** Maximum number of fragments that can be reassembled. */
+   uint16_t max_frags;
+   /**
+* Flags to enable reassembly of packet types -
+* RTE_ETH_DEV_REASSEMBLY_F_xxx.
+*/
+   uint16_t flags;
+};
+
 /**
  * A structure used to retrieve the contextual information of
  * an Ethernet device, such as the controlling driver of the
@@ -1841,8 +1865,10 @@ struct rte_eth_dev_info {
 * embedded managed interconnect/switch.
 */
struct rte_eth_switch_info switch_info;
+   /** IP reassembly offload capabilities that a device can support. */
+   struct rte_eth_ip_reass_params reass_capa;
 
-   uint64_t reserved_64s[2]; /**< Reserved for future fields */
+   uint64_t reserved_64s[1]; /**< Reserved for future fields */
void *reserved_ptrs[2];   /**< Reserved for future fields */
 };
 
-- 
2.25.1



[PATCH v3 2/4] ethdev: add dev op to set/get IP reassembly configuration

2022-01-30 Thread Akhil Goyal
A new Ethernet device op is added to give application control over
the IP reassembly configuration. This operation is an optional
call from the application, default/max values are set by PMD and
exposed via rte_eth_dev_info.
Application should always first retrieve the capabilities from
rte_eth_dev_info and then set the fields accordingly. The set
API should be called before the starting the device.

User can get the currently set values using the get API.

Signed-off-by: Akhil Goyal 
---
 doc/guides/nics/features.rst |  1 +
 lib/ethdev/ethdev_driver.h   | 37 +
 lib/ethdev/rte_ethdev.c  | 80 
 lib/ethdev/rte_ethdev.h  | 51 +++
 lib/ethdev/version.map   |  4 ++
 5 files changed, 173 insertions(+)

diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index b45bce4a78..2a3cf09066 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -611,6 +611,7 @@ Supports IP reassembly in hardware.
 
 * **[uses] rte_eth_rxconf,rte_eth_rxmode**: 
``offloads:RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY``.
 * **[provides] rte_eth_dev_info**: ``reass_capa``.
+* **[provides] eth_dev_ops**: 
``ip_reassembly_conf_get:ip_reassembly_conf_set``.
 
 
 .. _nic_features_shared_rx_queue:
diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index d95605a355..a310001648 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -990,6 +990,38 @@ typedef int (*eth_representor_info_get_t)(struct 
rte_eth_dev *dev,
 typedef int (*eth_rx_metadata_negotiate_t)(struct rte_eth_dev *dev,
   uint64_t *features);
 
+/**
+ * @internal
+ * Get IP reassembly offload configuration parameters set in PMD.
+ *
+ * @param dev
+ *   Port (ethdev) handle
+ *
+ * @param[out] conf
+ *   Configuration parameters for IP reassembly.
+ *
+ * @return
+ *   Negative errno value on error, zero otherwise
+ */
+typedef int (*eth_ip_reassembly_conf_get_t)(struct rte_eth_dev *dev,
+  struct rte_eth_ip_reass_params *conf);
+
+/**
+ * @internal
+ * Set configuration parameters for enabling IP reassembly offload in hardware.
+ *
+ * @param dev
+ *   Port (ethdev) handle
+ *
+ * @param[in] conf
+ *   Configuration parameters for IP reassembly.
+ *
+ * @return
+ *   Negative errno value on error, zero otherwise
+ */
+typedef int (*eth_ip_reassembly_conf_set_t)(struct rte_eth_dev *dev,
+  struct rte_eth_ip_reass_params *conf);
+
 /**
  * @internal A structure containing the functions exported by an Ethernet 
driver.
  */
@@ -1186,6 +1218,11 @@ struct eth_dev_ops {
 * kinds of metadata to the PMD
 */
eth_rx_metadata_negotiate_t rx_metadata_negotiate;
+
+   /** Get IP reassembly configuration */
+   eth_ip_reassembly_conf_get_t ip_reassembly_conf_get;
+   /** Set IP reassembly configuration */
+   eth_ip_reassembly_conf_set_t ip_reassembly_conf_set;
 };
 
 /**
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index d9a03f12f9..6e9a8cf33b 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6473,6 +6473,86 @@ rte_eth_rx_metadata_negotiate(uint16_t port_id, uint64_t 
*features)
   (*dev->dev_ops->rx_metadata_negotiate)(dev, features));
 }
 
+int
+rte_eth_ip_reassembly_conf_get(uint16_t port_id,
+  struct rte_eth_ip_reass_params *conf)
+{
+   struct rte_eth_dev *dev;
+
+   RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+   dev = &rte_eth_devices[port_id];
+
+   if (dev->data->dev_configured == 0) {
+   RTE_ETHDEV_LOG(ERR,
+   "Device with port_id=%"PRIu16" is not configured.\n",
+   port_id);
+   return -EINVAL;
+   }
+
+   if ((dev->data->dev_conf.rxmode.offloads &
+   RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY) == 0) {
+   RTE_ETHDEV_LOG(ERR,
+   "The port (ID=%"PRIu16") is not configured for IP 
reassembly\n",
+   port_id);
+   return -EINVAL;
+   }
+
+   if (conf == NULL) {
+   RTE_ETHDEV_LOG(ERR, "Cannot get reassembly info to NULL");
+   return -EINVAL;
+   }
+
+   RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->ip_reassembly_conf_get,
+   -ENOTSUP);
+   memset(conf, 0, sizeof(struct rte_eth_ip_reass_params));
+   return eth_err(port_id,
+  (*dev->dev_ops->ip_reassembly_conf_get)(dev, conf));
+}
+
+int
+rte_eth_ip_reassembly_conf_set(uint16_t port_id,
+  struct rte_eth_ip_reass_params *conf)
+{
+   struct rte_eth_dev *dev;
+
+   RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+   dev = &rte_eth_devices[port_id];
+
+   if (dev->data->dev_configured == 0) {
+   RTE_ETHDEV_LOG(ERR,
+   

[PATCH v3 3/4] ethdev: add mbuf dynfield for incomplete IP reassembly

2022-01-30 Thread Akhil Goyal
Hardware IP reassembly may be incomplete for multiple reasons like
reassembly timeout reached, duplicate fragments, etc.
To save application cycles to process these packets again, a new
mbuf dynflag is added to show that the mbuf received is not
reassembled properly.

Now if this dynflag is set, application can retrieve corresponding
chain of mbufs using mbuf dynfield set by the PMD. Now, it will be
up to application to either drop those fragments or wait for more time.

Signed-off-by: Akhil Goyal 
---
 lib/ethdev/ethdev_driver.h |  8 
 lib/ethdev/rte_ethdev.c| 28 
 lib/ethdev/rte_ethdev.h| 21 +
 lib/ethdev/version.map |  1 +
 4 files changed, 58 insertions(+)

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index a310001648..7499a4fbf5 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -1689,6 +1689,14 @@ int
 rte_eth_hairpin_queue_peer_unbind(uint16_t cur_port, uint16_t cur_queue,
  uint32_t direction);
 
+/**
+ * @internal
+ * Register mbuf dynamic field and flag for IP reassembly incomplete case.
+ */
+__rte_internal
+int
+rte_eth_ip_reass_dynfield_register(int *field_offset, int *flag);
+
 
 /*
  * Legacy ethdev API used internally by drivers.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 6e9a8cf33b..3c68a951c0 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -6553,6 +6553,34 @@ rte_eth_ip_reassembly_conf_set(uint16_t port_id,
   (*dev->dev_ops->ip_reassembly_conf_set)(dev, conf));
 }
 
+int
+rte_eth_ip_reass_dynfield_register(int *field_offset, int *flag_offset)
+{
+   static const struct rte_mbuf_dynfield field_desc = {
+   .name = RTE_ETH_IP_REASS_DYNFIELD_NAME,
+   .size = sizeof(rte_eth_ip_reass_dynfield_t),
+   .align = __alignof__(rte_eth_ip_reass_dynfield_t),
+   };
+   static const struct rte_mbuf_dynflag ip_reass_dynflag = {
+   .name = RTE_ETH_IP_REASS_INCOMPLETE_DYNFLAG_NAME,
+   };
+   int offset;
+
+   offset = rte_mbuf_dynfield_register(&field_desc);
+   if (offset < 0)
+   return -1;
+   if (field_offset != NULL)
+   *field_offset = offset;
+
+   offset = rte_mbuf_dynflag_register(&ip_reass_dynflag);
+   if (offset < 0)
+   return -1;
+   if (flag_offset != NULL)
+   *flag_offset = offset;
+
+   return 0;
+}
+
 RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
 
 RTE_INIT(ethdev_init_telemetry)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index e3532591f4..e3e6368a1d 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -5264,6 +5264,27 @@ __rte_experimental
 int rte_eth_ip_reassembly_conf_set(uint16_t port_id,
   struct rte_eth_ip_reass_params *conf);
 
+#define RTE_ETH_IP_REASS_DYNFIELD_NAME "rte_eth_ip_reass_dynfield"
+#define RTE_ETH_IP_REASS_INCOMPLETE_DYNFLAG_NAME 
"rte_eth_ip_reass_incomplete_dynflag"
+
+/**
+ * In case of IP reassembly offload failure, ol_flags in mbuf will be set
+ * with RTE_MBUF_F_RX_IPREASSEMBLY_INCOMPLETE and packets will be returned
+ * without alteration. The application can retrieve the attached fragments
+ * using mbuf dynamic field.
+ */
+typedef struct {
+   /**
+* Next fragment packet. Application should fetch dynamic field of
+* each fragment until a NULL is received and nb_frags is 0.
+*/
+   struct rte_mbuf *next_frag;
+   /** Time spent(in ms) by HW in waiting for further fragments. */
+   uint16_t time_spent_ms;
+   /** Number of more fragments attached in mbuf dynamic fields. */
+   uint16_t nb_frags;
+} rte_eth_ip_reass_dynfield_t;
+
 
 #include 
 
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index ad829dd47e..8b7578471a 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -283,6 +283,7 @@ INTERNAL {
rte_eth_hairpin_queue_peer_bind;
rte_eth_hairpin_queue_peer_unbind;
rte_eth_hairpin_queue_peer_update;
+   rte_eth_ip_reass_dynfield_register;
rte_eth_representor_id_get;
rte_eth_switch_domain_alloc;
rte_eth_switch_domain_free;
-- 
2.25.1



[PATCH v3 4/4] security: add IPsec option for IP reassembly

2022-01-30 Thread Akhil Goyal
A new option is added in IPsec to enable and attempt reassembly
of inbound packets.

Signed-off-by: Akhil Goyal 
---
 devtools/libabigail.abignore | 14 ++
 lib/security/rte_security.h  | 12 +++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore
index 90f449c43a..c6e304282f 100644
--- a/devtools/libabigail.abignore
+++ b/devtools/libabigail.abignore
@@ -16,3 +16,17 @@
 [suppress_type]
name = rte_eth_dev_info
has_data_member_inserted_between = {offset_of(reserved_64s), end}
+
+; Ignore fields inserted in place of reserved_opts of 
rte_security_ipsec_sa_options
+[suppress_type]
+   name = rte_ipsec_sa_prm
+   name = rte_security_ipsec_sa_options
+   has_data_member_inserted_between = {offset_of(reserved_opts), end}
+
+[suppress_type]
+   name = rte_security_capability
+   has_data_member_inserted_between = {offset_of(reserved_opts), 
(offset_of(reserved_opts) + 18)}
+
+[suppress_type]
+   name = rte_security_session_conf
+   has_data_member_inserted_between = {offset_of(reserved_opts), 
(offset_of(reserved_opts) + 18)}
diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
index 1228b6c8b1..168b837a82 100644
--- a/lib/security/rte_security.h
+++ b/lib/security/rte_security.h
@@ -264,6 +264,16 @@ struct rte_security_ipsec_sa_options {
 */
uint32_t l4_csum_enable : 1;
 
+   /** Enable reassembly on incoming packets.
+*
+* * 1: Enable driver to try reassembly of encrypted IP packets for
+*  this SA, if supported by the driver. This feature will work
+*  only if rx_offload RTE_ETH_RX_OFFLOAD_IP_REASSEMBLY is set in
+*  inline Ethernet device.
+* * 0: Disable reassembly of packets (default).
+*/
+   uint32_t reass_en : 1;
+
/** Reserved bit fields for future extension
 *
 * User should ensure reserved_opts is cleared as it may change in
@@ -271,7 +281,7 @@ struct rte_security_ipsec_sa_options {
 *
 * Note: Reduce number of bits in reserved_opts for every new option.
 */
-   uint32_t reserved_opts : 18;
+   uint32_t reserved_opts : 17;
 };
 
 /** IPSec security association direction */
-- 
2.25.1



[PATCH v5] Add pragma to ignore gcc-compat warnings in clang when used with diagnose_if.

2022-01-30 Thread Michael Barker
When compiling with clang using -Wpedantic (or -Wgcc-compat) the use of
diagnose_if kicks up a warning:

.../include/rte_interrupts.h:623:1: error: 'diagnose_if' is a clang
extension [-Werror,-Wgcc-compat]
__rte_internal
^
.../include/rte_compat.h:36:16: note: expanded from macro '__rte_internal'
__attribute__((diagnose_if(1, "Symbol is not public ABI", "error"), \

This change ignores the '-Wgcc-compat' warning in the specific location
where the warning occurs.  It is safe to do in this circumstance as the
specific macro is only defined when using the clang compiler.

Signed-off-by: Michael Barker 
---
 lib/eal/include/rte_compat.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/eal/include/rte_compat.h b/lib/eal/include/rte_compat.h
index 2718612cce..9556bbf4d0 100644
--- a/lib/eal/include/rte_compat.h
+++ b/lib/eal/include/rte_compat.h
@@ -33,8 +33,11 @@ section(".text.internal")))
 #elif !defined ALLOW_INTERNAL_API && __has_attribute(diagnose_if) /* For clang 
*/
 
 #define __rte_internal \
+_Pragma("GCC diagnostic push") \
+_Pragma("GCC diagnostic ignored \"-Wgcc-compat\"") \
 __attribute__((diagnose_if(1, "Symbol is not public ABI", "error"), \
-section(".text.internal")))
+section(".text.internal"))) \
+_Pragma("GCC diagnostic pop")
 
 #else
 
-- 
2.25.1



Re: [PATCH v4] Add pragma to ignore gcc-compat warnings in clang when used with diagnose_if.

2022-01-30 Thread Michael Barker
> > extension [-Werror,-Wgcc-compat]
> > __rte_internal
> > ^
>
> Which clang version is this?
>

Clang 10, 11, 12 and 13.


> Perhaps the allow internal API could use a different attribute that
> could work in both cases?
>

I've realised I've made a small error. It is not -Wall that includes this
particular check, but -Wpendantic.  I'll update the commit message.

However I can't find an attribute that is cross platform.  There is the
#error directive, but it does not work as an attribute.  I've also tried
adding `#if __clang__` around the macro as well, but the warning still gets
triggered by clang.

If there are any other options that I've missed, let me know and I can try
them out.

Mike.


Re: [PATCH v4] Add pragma to ignore gcc-compat warnings in clang when used with diagnose_if.

2022-01-30 Thread Michael Barker
>
> > +++ b/lib/eal/include/rte_compat.h
> > @@ -33,8 +33,11 @@ section(".text.internal")))
> >  #elif !defined ALLOW_INTERNAL_API && __has_attribute(diagnose_if) /*
> > For clang */
>
> Why doesn't the __has_attribute take care of this?
> I would have thought that gcc would check the for the attribute, find it
> doesn't support it and ignore the whole thing?
>

It appears that the '-Wgcc-compat' check is too naive and doesn't pick up
the `__has_attribute` or `#if __clang__` and realise that there isn't
really a compatibility issue with the code.

Mike.


RE: [dpdk-dev][dpdk-users] A problem about memory may not be all-zero allocated by rte_zmalloc_socket()

2022-01-30 Thread Honnappa Nagarahalli
Hi Yunjian,
That's interesting. Is it possible to elaborate the use case or 
possibly provide the code snippet?

It is possible that it is a synchronization problem due to relaxed memory model 
that Arm architecture uses. There could be a barrier missing in the code.

Thanks,
Honnappa

From: wangyunjian 
Sent: Saturday, January 29, 2022 9:21 PM
To: dev@dpdk.org; us...@dpdk.org
Cc: Feifei Wang ; Ruifeng Wang ; 
Huangshaozhang ; dingxiaoxiong 

Subject: [dpdk-dev][dpdk-users] A problem about memory may not be all-zero 
allocated by rte_zmalloc_socket()

Hi, all

There's a problem that the memory are allocated by rte_zmalloc_socket()
may not be all-zero on the ARM platform.

However, the x86 platform does not have this problem.


Any ideas ?


Thanks,

Yunjian



[PATCH v1 1/2] net/axgbe: add support for Yellow Carp ethernet device

2022-01-30 Thread ssebasti
From: Selwin Sebastian 

Yellow Carp ethernet devices (V3xxx) use the existing PCI ID but
the window settings for the indirect PCS access have been
altered. Add the check for Yellow Carp Ethernet devices to
use the new register values.

Signed-off-by: Selwin Sebastian 
---
 drivers/net/axgbe/axgbe_common.h |  2 ++
 drivers/net/axgbe/axgbe_ethdev.c | 34 +---
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 5310ac54f5..b9ebf64fb8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -901,6 +901,8 @@
 #define PCS_V2_WINDOW_SELECT   0x9064
 #define PCS_V2_RV_WINDOW_DEF   0x1060
 #define PCS_V2_RV_WINDOW_SELECT0x1064
+#define PCS_V2_YC_WINDOW_DEF   0x18060
+#define PCS_V2_YC_WINDOW_SELECT0x18064
 
 /* PCS register entry bit positions and sizes */
 #define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index e9546469f3..2be9387f98 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -173,6 +173,8 @@ static const struct axgbe_xstats axgbe_xstats_strings[] = {
 /* The set of PCI devices this driver supports */
 #define AMD_PCI_VENDOR_ID   0x1022
 #define AMD_PCI_RV_ROOT_COMPLEX_ID 0x15d0
+#define AMD_PCI_YC_ROOT_COMPLEX_ID 0x14b5
+#define AMD_PCI_SNOWY_ROOT_COMPLEX_ID  0x1450
 #define AMD_PCI_AXGBE_DEVICE_V2A 0x1458
 #define AMD_PCI_AXGBE_DEVICE_V2B 0x1459
 
@@ -2178,17 +2180,6 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
pdata->pci_dev = pci_dev;
 
-   /*
-* Use root complex device ID to differentiate RV AXGBE vs SNOWY AXGBE
-*/
-   if ((get_pci_rc_devid()) == AMD_PCI_RV_ROOT_COMPLEX_ID) {
-   pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF;
-   pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT;
-   } else {
-   pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF;
-   pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT;
-   }
-
pdata->xgmac_regs =
(void *)pci_dev->mem_resource[AXGBE_AXGMAC_BAR].addr;
pdata->xprop_regs = (void *)((uint8_t *)pdata->xgmac_regs
@@ -2203,6 +2194,27 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
else
pdata->vdata = &axgbe_v2b;
 
+   /*
+* Use PCI root complex device ID to identify the CPU
+*/
+   switch (get_pci_rc_devid()) {
+   case AMD_PCI_RV_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT;
+   break;
+   case AMD_PCI_YC_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT;
+   break;
+   case AMD_PCI_SNOWY_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT;
+   break;
+   default:
+   PMD_DRV_LOG(ERR, "No supported devices found\n");
+   return -ENODEV;
+   }
+
/* Configure the PCS indirect addressing support */
reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg);
pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET);
-- 
2.25.1



[PATCH v1 0/2] net/axgbe: Add support for Yellow Carp ethernet

2022-01-30 Thread ssebasti
From: Selwin Sebastian 

Adding support for Yellow Carp ethernet device

Selwin Sebastian (2):
  net/axgbe: add support for Yellow Carp ethernet device
  net/axgbe: disable the CDR wa for Yellow Carp devices

 drivers/net/axgbe/axgbe_common.h |  2 ++
 drivers/net/axgbe/axgbe_ethdev.c | 36 ++--
 2 files changed, 27 insertions(+), 11 deletions(-)

-- 
2.25.1



[PATCH v1 2/2] net/axgbe: disable the CDR wa for Yellow Carp devices

2022-01-30 Thread ssebasti
From: Selwin Sebastian 

Yellow Carp ethernet devices (V3xxx) do not require
autonegotiation CDR workaround, hence disable the same.

Signed-off-by: Selwin Sebastian 
---
 drivers/net/axgbe/axgbe_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index 2be9387f98..951da5cc26 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -2205,6 +2205,8 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
case AMD_PCI_YC_ROOT_COMPLEX_ID:
pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF;
pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT;
+   /* Yellow Carp devices do not need cdr workaround */
+   pdata->vdata->an_cdr_workaround = 0;
break;
case AMD_PCI_SNOWY_ROOT_COMPLEX_ID:
pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF;
-- 
2.25.1



RE: [PATCH v1 1/2] net/axgbe: add support for Yellow Carp ethernet device

2022-01-30 Thread Namburu, Chandu-babu
[Public]



-Original Message-
From: Sebastian, Selwin  
Sent: Monday, January 31, 2022 11:09 AM
To: dev@dpdk.org
Cc: Namburu, Chandu-babu ; ferruh.yi...@intel.com
Subject: [PATCH v1 1/2] net/axgbe: add support for Yellow Carp ethernet device

From: Selwin Sebastian 

Yellow Carp ethernet devices (V3xxx) use the existing PCI ID but the window 
settings for the indirect PCS access have been altered. Add the check for 
Yellow Carp Ethernet devices to use the new register values.

Signed-off-by: Selwin Sebastian 
---
 drivers/net/axgbe/axgbe_common.h |  2 ++  drivers/net/axgbe/axgbe_ethdev.c | 
34 +---
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index 5310ac54f5..b9ebf64fb8 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -901,6 +901,8 @@
 #define PCS_V2_WINDOW_SELECT   0x9064
 #define PCS_V2_RV_WINDOW_DEF   0x1060
 #define PCS_V2_RV_WINDOW_SELECT0x1064
+#define PCS_V2_YC_WINDOW_DEF   0x18060
+#define PCS_V2_YC_WINDOW_SELECT0x18064
 
 /* PCS register entry bit positions and sizes */
 #define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6
diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c
index e9546469f3..2be9387f98 100644
--- a/drivers/net/axgbe/axgbe_ethdev.c
+++ b/drivers/net/axgbe/axgbe_ethdev.c
@@ -173,6 +173,8 @@ static const struct axgbe_xstats axgbe_xstats_strings[] = {
 /* The set of PCI devices this driver supports */
 #define AMD_PCI_VENDOR_ID   0x1022
 #define AMD_PCI_RV_ROOT_COMPLEX_ID 0x15d0
+#define AMD_PCI_YC_ROOT_COMPLEX_ID 0x14b5
+#define AMD_PCI_SNOWY_ROOT_COMPLEX_ID  0x1450
 #define AMD_PCI_AXGBE_DEVICE_V2A 0x1458  #define AMD_PCI_AXGBE_DEVICE_V2B 
0x1459
 
@@ -2178,17 +2180,6 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
pdata->pci_dev = pci_dev;
 
-   /*
-* Use root complex device ID to differentiate RV AXGBE vs SNOWY AXGBE
-*/
-   if ((get_pci_rc_devid()) == AMD_PCI_RV_ROOT_COMPLEX_ID) {
-   pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF;
-   pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT;
-   } else {
-   pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF;
-   pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT;
-   }
-
pdata->xgmac_regs =
(void *)pci_dev->mem_resource[AXGBE_AXGMAC_BAR].addr;
pdata->xprop_regs = (void *)((uint8_t *)pdata->xgmac_regs @@ -2203,6 
+2194,27 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev)
else
pdata->vdata = &axgbe_v2b;
 
+   /*
+* Use PCI root complex device ID to identify the CPU
+*/
+   switch (get_pci_rc_devid()) {
+   case AMD_PCI_RV_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT;
+   break;
+   case AMD_PCI_YC_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT;
+   break;
+   case AMD_PCI_SNOWY_ROOT_COMPLEX_ID:
+   pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF;
+   pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT;
+   break;
+   default:
+   PMD_DRV_LOG(ERR, "No supported devices found\n");
+   return -ENODEV;
+   }
+
/* Configure the PCS indirect addressing support */
reg = XPCS32_IOREAD(pdata, pdata->xpcs_window_def_reg);
pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET);
--
2.25.1

For series,
Acked-by: Chandubabu Namburu