IPv4 GRO

Yao, Lei A Wed, 27 Sep 2017 00:24:42 -0700


> -----Original Message-----
> From: Hu, Jiayu
> Sent: Tuesday, September 26, 2017 2:27 PM
> To: dev@dpdk.org
> Cc: Yigit, Ferruh <ferruh.yi...@intel.com>; Tan, Jianfeng
> <jianfeng....@intel.com>; Ananyev, Konstantin
> <konstantin.anan...@intel.com>; tho...@monjalon.net; Wu, Jingjing
> <jingjing...@intel.com>; Yao, Lei A <lei.a....@intel.com>; Hu, Jiayu
> <jiayu...@intel.com>
> Subject: [PATCH v4] app/testpmd: enable the heavyweight mode TCP/IPv4
> GRO
> 
> The GRO library provides two modes to reassemble packets. Currently, the
> csum forwarding engine has supported to use the lightweight mode to
> reassemble TCP/IPv4 packets. This patch introduces the heavyweight mode
> for TCP/IPv4 GRO in the csum forwarding engine.
> 
> With the command "set port <port_id> gro on|off", users can enable
> TCP/IPv4 GRO for a given port. With the command "set gro flush <cycles>",
> users can determine when the GROed TCP/IPv4 packets are flushed from
> reassembly tables. With the command "show port <port_id> gro", users can
> display GRO configuration.
> 
> The GRO library doesn't re-calculate checksums for merged packets. If
> users want the merged packets to have correct IP and TCP checksums,
> please select HW IP checksum calculation and HW TCP checksum calculation
> for the port which the merged packets are transmitted to.
> 
> Signed-off-by: Jiayu Hu <jiayu...@intel.com>
> Reviewed-by: Ferruh Yigit <ferruh.yi...@intel.com>
Tested-by: Yao Lei<lei.a....@intel.com>


This patch has beed tested on my bench. The following
is the performance data got from iperf test with single flow
No GRO: 9.5 Gbps
Kernel GRO: 13.6 Gbps
DPDK GRO with flush cycle=1 : 25.9 Gbps
DPDK GRO with flush cycle=2 : 27.9 Gbps

Note: When use DPDK GRO with flush cycle=2, I set 
the vhost rx_queue_size to 1024, if use default number
256, sometimes I met the date stall. 
OS: Ubuntu 16.04
CPU: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz  

> ---
> changes in v4:
> - fix unchecking the min value of 'cycle' bug in setup_gro_flush_cycles
> - update the context of the testpmd document and commit logs
> changes in v3:
> - remove "heavyweight mode" and "lightweight mode" from GRO
> commands
> - combine two patches into one
> - use consistent help string for GRO commands
> - remove the unnecessary command "gro set (max_flow_num)
>       (max_item_num_per_flow) (port_id)"
> changes in v2:
> - use "set" and "show" as the root level command
> - add a new command to show GRO configuration
> - fix l2_len/l3_len/l4_len unset etc. bugs
> 
>  app/test-pmd/cmdline.c                      | 206 
> ++++++++++++++++------------
>  app/test-pmd/config.c                       |  68 +++++++--
>  app/test-pmd/csumonly.c                     |  31 ++++-
>  app/test-pmd/testpmd.c                      |  19 ++-
>  app/test-pmd/testpmd.h                      |  16 ++-
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  50 +++++--
>  6 files changed, 270 insertions(+), 120 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index ccdf239..e44c02e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -423,13 +423,16 @@ static void cmd_help_long_parsed(void
> *parsed_result,
>                       "tso show (portid)"
>                       "    Display the status of TCP Segmentation
> Offload.\n\n"
> 
> -                     "gro (on|off) (port_id)"
> +                     "set port (port_id) gro on|off\n"
>                       "    Enable or disable Generic Receive Offload in"
>                       " csum forwarding engine.\n\n"
> 
> -                     "gro set (max_flow_num)
> (max_item_num_per_flow) (port_id)\n"
> -                     "    Set max flow number and max packet number
> per-flow"
> -                     " for GRO.\n\n"
> +                     "show port (port_id) gro\n"
> +                     "    Display GRO configuration.\n\n"
> +
> +                     "set gro flush (cycles)\n"
> +                     "    Set the cycle to flush GROed packets from"
> +                     " reassembly tables.\n\n"
> 
>                       "set fwd (%s)\n"
>                       "    Set packet forwarding mode.\n\n"
> @@ -3854,115 +3857,145 @@ cmdline_parse_inst_t cmd_tunnel_tso_show
> = {
>  };
> 
>  /* *** SET GRO FOR A PORT *** */
> -struct cmd_gro_result {
> +struct cmd_gro_enable_result {
> +     cmdline_fixed_string_t cmd_set;
> +     cmdline_fixed_string_t cmd_port;
>       cmdline_fixed_string_t cmd_keyword;
> -     cmdline_fixed_string_t mode;
> -     uint8_t port_id;
> +     cmdline_fixed_string_t cmd_onoff;
> +     uint8_t cmd_pid;
>  };
> 
>  static void
> -cmd_enable_gro_parsed(void *parsed_result,
> +cmd_gro_enable_parsed(void *parsed_result,
>               __attribute__((unused)) struct cmdline *cl,
>               __attribute__((unused)) void *data)
>  {
> -     struct cmd_gro_result *res;
> +     struct cmd_gro_enable_result *res;
> 
>       res = parsed_result;
> -     setup_gro(res->mode, res->port_id);
> -}
> -
> -cmdline_parse_token_string_t cmd_gro_keyword =
> -     TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
> +     if (!strcmp(res->cmd_keyword, "gro"))
> +             setup_gro(res->cmd_onoff, res->cmd_pid);
> +}
> +
> +cmdline_parse_token_string_t cmd_gro_enable_set =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
> +                     cmd_set, "set");
> +cmdline_parse_token_string_t cmd_gro_enable_port =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
> +                     cmd_keyword, "port");
> +cmdline_parse_token_num_t cmd_gro_enable_pid =
> +     TOKEN_NUM_INITIALIZER(struct cmd_gro_enable_result,
> +                     cmd_pid, UINT8);
> +cmdline_parse_token_string_t cmd_gro_enable_keyword =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
>                       cmd_keyword, "gro");
> -cmdline_parse_token_string_t cmd_gro_mode =
> -     TOKEN_STRING_INITIALIZER(struct cmd_gro_result,
> -                     mode, "on#off");
> -cmdline_parse_token_num_t cmd_gro_pid =
> -     TOKEN_NUM_INITIALIZER(struct cmd_gro_result,
> -                     port_id, UINT8);
> +cmdline_parse_token_string_t cmd_gro_enable_onoff =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_enable_result,
> +                     cmd_onoff, "on#off");
> 
> -cmdline_parse_inst_t cmd_enable_gro = {
> -     .f = cmd_enable_gro_parsed,
> +cmdline_parse_inst_t cmd_gro_enable = {
> +     .f = cmd_gro_enable_parsed,
>       .data = NULL,
> -     .help_str = "gro (on|off) (port_id)",
> +     .help_str = "set port <port_id> gro on|off",
>       .tokens = {
> -             (void *)&cmd_gro_keyword,
> -             (void *)&cmd_gro_mode,
> -             (void *)&cmd_gro_pid,
> +             (void *)&cmd_gro_enable_set,
> +             (void *)&cmd_gro_enable_port,
> +             (void *)&cmd_gro_enable_pid,
> +             (void *)&cmd_gro_enable_keyword,
> +             (void *)&cmd_gro_enable_onoff,
>               NULL,
>       },
>  };
> 
> -/* *** SET MAX FLOW NUMBER AND ITEM NUM PER FLOW FOR GRO *** */
> -struct cmd_gro_set_result {
> -     cmdline_fixed_string_t gro;
> -     cmdline_fixed_string_t mode;
> -     uint16_t flow_num;
> -     uint16_t item_num_per_flow;
> -     uint8_t port_id;
> +/* *** DISPLAY GRO CONFIGURATION *** */
> +struct cmd_gro_show_result {
> +     cmdline_fixed_string_t cmd_show;
> +     cmdline_fixed_string_t cmd_port;
> +     cmdline_fixed_string_t cmd_keyword;
> +     uint8_t cmd_pid;
>  };
> 
>  static void
> -cmd_gro_set_parsed(void *parsed_result,
> -                    __attribute__((unused)) struct cmdline *cl,
> -                    __attribute__((unused)) void *data)
> +cmd_gro_show_parsed(void *parsed_result,
> +             __attribute__((unused)) struct cmdline *cl,
> +             __attribute__((unused)) void *data)
>  {
> -     struct cmd_gro_set_result *res = parsed_result;
> +     struct cmd_gro_show_result *res;
> 
> -     if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> -             return;
> -     if (test_done == 0) {
> -             printf("Before set GRO flow_num and item_num_per_flow,"
> -                             " please stop forwarding first\n");
> -             return;
> -     }
> +     res = parsed_result;
> +     if (!strcmp(res->cmd_keyword, "gro"))
> +             show_gro(res->cmd_pid);
> +}
> +
> +cmdline_parse_token_string_t cmd_gro_show_show =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_show_result,
> +                     cmd_show, "show");
> +cmdline_parse_token_string_t cmd_gro_show_port =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_show_result,
> +                     cmd_port, "port");
> +cmdline_parse_token_num_t cmd_gro_show_pid =
> +     TOKEN_NUM_INITIALIZER(struct cmd_gro_show_result,
> +                     cmd_pid, UINT8);
> +cmdline_parse_token_string_t cmd_gro_show_keyword =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_show_result,
> +                     cmd_keyword, "gro");
> 
> -     if (!strcmp(res->mode, "set")) {
> -             if (res->flow_num == 0)
> -                     printf("Invalid flow number. Revert to default value:"
> -                                     " %u.\n",
> GRO_DEFAULT_FLOW_NUM);
> -             else
> -                     gro_ports[res->port_id].param.max_flow_num =
> -                             res->flow_num;
> +cmdline_parse_inst_t cmd_gro_show = {
> +     .f = cmd_gro_show_parsed,
> +     .data = NULL,
> +     .help_str = "show port <port_id> gro",
> +     .tokens = {
> +             (void *)&cmd_gro_show_show,
> +             (void *)&cmd_gro_show_port,
> +             (void *)&cmd_gro_show_pid,
> +             (void *)&cmd_gro_show_keyword,
> +             NULL,
> +     },
> +};
> 
> -             if (res->item_num_per_flow == 0)
> -                     printf("Invalid item number per-flow. Revert"
> -                                     " to default value:%u.\n",
> -
>       GRO_DEFAULT_ITEM_NUM_PER_FLOW);
> -             else
> -                     gro_ports[res->port_id].param.max_item_per_flow
> =
> -                             res->item_num_per_flow;
> -     }
> +/* *** SET FLUSH CYCLES FOR GRO *** */
> +struct cmd_gro_flush_result {
> +     cmdline_fixed_string_t cmd_set;
> +     cmdline_fixed_string_t cmd_keyword;
> +     cmdline_fixed_string_t cmd_flush;
> +     uint8_t cmd_cycles;
> +};
> +
> +static void
> +cmd_gro_flush_parsed(void *parsed_result,
> +             __attribute__((unused)) struct cmdline *cl,
> +             __attribute__((unused)) void *data)
> +{
> +     struct cmd_gro_flush_result *res;
> +
> +     res = parsed_result;
> +     if ((!strcmp(res->cmd_keyword, "gro")) &&
> +                     (!strcmp(res->cmd_flush, "flush")))
> +             setup_gro_flush_cycles(res->cmd_cycles);
>  }
> 
> -cmdline_parse_token_string_t cmd_gro_set_gro =
> -     TOKEN_STRING_INITIALIZER(struct cmd_gro_set_result,
> -                             gro, "gro");
> -cmdline_parse_token_string_t cmd_gro_set_mode =
> -     TOKEN_STRING_INITIALIZER(struct cmd_gro_set_result,
> -                             mode, "set");
> -cmdline_parse_token_num_t cmd_gro_set_flow_num =
> -     TOKEN_NUM_INITIALIZER(struct cmd_gro_set_result,
> -                             flow_num, UINT16);
> -cmdline_parse_token_num_t cmd_gro_set_item_num_per_flow =
> -     TOKEN_NUM_INITIALIZER(struct cmd_gro_set_result,
> -                             item_num_per_flow, UINT16);
> -cmdline_parse_token_num_t cmd_gro_set_portid =
> -     TOKEN_NUM_INITIALIZER(struct cmd_gro_set_result,
> -                             port_id, UINT8);
> +cmdline_parse_token_string_t cmd_gro_flush_set =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_flush_result,
> +                     cmd_set, "set");
> +cmdline_parse_token_string_t cmd_gro_flush_keyword =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_flush_result,
> +                     cmd_keyword, "gro");
> +cmdline_parse_token_string_t cmd_gro_flush_flush =
> +     TOKEN_STRING_INITIALIZER(struct cmd_gro_flush_result,
> +                     cmd_flush, "flush");
> +cmdline_parse_token_num_t cmd_gro_flush_cycles =
> +     TOKEN_NUM_INITIALIZER(struct cmd_gro_flush_result,
> +                     cmd_cycles, UINT8);
> 
> -cmdline_parse_inst_t cmd_gro_set = {
> -     .f = cmd_gro_set_parsed,
> +cmdline_parse_inst_t cmd_gro_flush = {
> +     .f = cmd_gro_flush_parsed,
>       .data = NULL,
> -     .help_str = "gro set <max_flow_num> <max_item_num_per_flow>
> "
> -             "<port_id>: set max flow number and max packet number
> per-flow "
> -             "for GRO",
> +     .help_str = "set gro flush <cycles>",
>       .tokens = {
> -             (void *)&cmd_gro_set_gro,
> -             (void *)&cmd_gro_set_mode,
> -             (void *)&cmd_gro_set_flow_num,
> -             (void *)&cmd_gro_set_item_num_per_flow,
> -             (void *)&cmd_gro_set_portid,
> +             (void *)&cmd_gro_flush_set,
> +             (void *)&cmd_gro_flush_keyword,
> +             (void *)&cmd_gro_flush_flush,
> +             (void *)&cmd_gro_flush_cycles,
>               NULL,
>       },
>  };
> @@ -14253,8 +14286,9 @@ cmdline_parse_ctx_t main_ctx[] = {
>       (cmdline_parse_inst_t *)&cmd_tso_show,
>       (cmdline_parse_inst_t *)&cmd_tunnel_tso_set,
>       (cmdline_parse_inst_t *)&cmd_tunnel_tso_show,
> -     (cmdline_parse_inst_t *)&cmd_enable_gro,
> -     (cmdline_parse_inst_t *)&cmd_gro_set,
> +     (cmdline_parse_inst_t *)&cmd_gro_enable,
> +     (cmdline_parse_inst_t *)&cmd_gro_flush,
> +     (cmdline_parse_inst_t *)&cmd_gro_show,
>       (cmdline_parse_inst_t *)&cmd_link_flow_control_set,
>       (cmdline_parse_inst_t *)&cmd_link_flow_control_set_rx,
>       (cmdline_parse_inst_t *)&cmd_link_flow_control_set_tx,
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 3ae3e1c..92220b1 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -2420,7 +2420,7 @@ set_tx_pkt_segments(unsigned *seg_lengths,
> unsigned nb_segs)
>  }
> 
>  void
> -setup_gro(const char *mode, uint8_t port_id)
> +setup_gro(const char *onoff, uint8_t port_id)
>  {
>       if (!rte_eth_dev_is_valid_port(port_id)) {
>               printf("invalid port id %u\n", port_id);
> @@ -2431,29 +2431,77 @@ setup_gro(const char *mode, uint8_t port_id)
>                               " please stop forwarding first\n");
>               return;
>       }
> -     if (strcmp(mode, "on") == 0) {
> -             if (gro_ports[port_id].enable) {
> -                     printf("port %u has enabled GRO\n", port_id);
> +     if (strcmp(onoff, "on") == 0) {
> +             if (gro_ports[port_id].enable != 0) {
> +                     printf("Port %u has enabled GRO. Please"
> +                                     " disable GRO first\n", port_id);
>                       return;
>               }
> -             gro_ports[port_id].enable = 1;
> -             gro_ports[port_id].param.gro_types = RTE_GRO_TCP_IPV4;
> -
> -             if (gro_ports[port_id].param.max_flow_num == 0)
> +             if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
> +                     gro_ports[port_id].param.gro_types =
> RTE_GRO_TCP_IPV4;
>                       gro_ports[port_id].param.max_flow_num =
>                               GRO_DEFAULT_FLOW_NUM;
> -             if (gro_ports[port_id].param.max_item_per_flow == 0)
>                       gro_ports[port_id].param.max_item_per_flow =
>                               GRO_DEFAULT_ITEM_NUM_PER_FLOW;
> +             }
> +             gro_ports[port_id].enable = 1;
>       } else {
>               if (gro_ports[port_id].enable == 0) {
> -                     printf("port %u has disabled GRO\n", port_id);
> +                     printf("Port %u has disabled GRO\n", port_id);
>                       return;
>               }
>               gro_ports[port_id].enable = 0;
>       }
>  }
> 
> +void
> +setup_gro_flush_cycles(uint8_t cycles)
> +{
> +     if (test_done == 0) {
> +             printf("Before change flush interval for GRO,"
> +                             " please stop forwarding first.\n");
> +             return;
> +     }
> +
> +     if (cycles > GRO_MAX_FLUSH_CYCLES || cycles <
> +                     GRO_DEFAULT_FLUSH_CYCLES) {
> +             printf("The flushing cycle be in the range"
> +                             " of 1 to %u. Revert to the default"
> +                             " value %u.\n",
> +                             GRO_MAX_FLUSH_CYCLES,
> +                             GRO_DEFAULT_FLUSH_CYCLES);
> +             cycles = GRO_DEFAULT_FLUSH_CYCLES;
> +     }
> +
> +     gro_flush_cycles = cycles;
> +}
> +
> +void
> +show_gro(uint8_t port_id)
> +{
> +     struct rte_gro_param *param;
> +     uint32_t max_pkts_num;
> +
> +     param = &gro_ports[port_id].param;
> +
> +     if (!rte_eth_dev_is_valid_port(port_id)) {
> +             printf("Invalid port id %u.\n", port_id);
> +             return;
> +     }
> +     if (gro_ports[port_id].enable) {
> +             printf("GRO type: TCP/IPv4\n");
> +             if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
> +                     max_pkts_num = param->max_flow_num *
> +                             param->max_item_per_flow;
> +             } else
> +                     max_pkts_num = MAX_PKT_BURST *
> GRO_MAX_FLUSH_CYCLES;
> +             printf("Max number of packets to perform GRO: %u\n",
> +                             max_pkts_num);
> +             printf("Flushing cycles: %u\n", gro_flush_cycles);
> +     } else
> +             printf("Port %u doesn't enable GRO.\n", port_id);
> +}
> +
>  char*
>  list_pkt_forwarding_modes(void)
>  {
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> index 90c8119..ca50ab7 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -631,6 +631,9 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>       struct rte_mbuf *m, *p;
>       struct ether_hdr *eth_hdr;
>       void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
> +     void **gro_ctx;
> +     uint16_t gro_pkts_num;
> +     uint8_t gro_enable;
>       uint16_t nb_rx;
>       uint16_t nb_tx;
>       uint16_t nb_prep;
> @@ -657,17 +660,13 @@ pkt_burst_checksum_forward(struct fwd_stream
> *fs)
>                                nb_pkt_per_burst);
>       if (unlikely(nb_rx == 0))
>               return;
> -     if (unlikely(gro_ports[fs->rx_port].enable))
> -             nb_rx = rte_gro_reassemble_burst(pkts_burst,
> -                             nb_rx,
> -                             &(gro_ports[fs->rx_port].param));
> -
>  #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
>       fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
>  #endif
>       fs->rx_packets += nb_rx;
>       rx_bad_ip_csum = 0;
>       rx_bad_l4_csum = 0;
> +     gro_enable = gro_ports[fs->rx_port].enable;
> 
>       txp = &ports[fs->tx_port];
>       testpmd_ol_flags = txp->tx_ol_flags;
> @@ -851,6 +850,28 @@ pkt_burst_checksum_forward(struct fwd_stream
> *fs)
>               }
>       }
> 
> +     if (unlikely(gro_enable)) {
> +             if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
> +                     nb_rx = rte_gro_reassemble_burst(pkts_burst,
> nb_rx,
> +                                     &(gro_ports[fs->rx_port].param));
> +             } else {
> +                     gro_ctx = current_fwd_lcore()->gro_ctx;
> +                     nb_rx = rte_gro_reassemble(pkts_burst, nb_rx,
> gro_ctx);
> +
> +                     if (++fs->gro_times >= gro_flush_cycles) {
> +                             gro_pkts_num =
> rte_gro_get_pkt_count(gro_ctx);
> +                             if (gro_pkts_num > MAX_PKT_BURST - nb_rx)
> +                                     gro_pkts_num = MAX_PKT_BURST -
> nb_rx;
> +
> +                             nb_rx += rte_gro_timeout_flush(gro_ctx, 0,
> +                                             RTE_GRO_TCP_IPV4,
> +                                             &pkts_burst[nb_rx],
> +                                             gro_pkts_num);
> +                             fs->gro_times = 0;
> +                     }
> +             }
> +     }
> +
>       nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
>                       pkts_burst, nb_rx);
>       if (nb_prep != nb_rx)
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index e097ee0..c9d988e 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -90,7 +90,6 @@
>  #ifdef RTE_LIBRTE_LATENCY_STATS
>  #include <rte_latencystats.h>
>  #endif
> -#include <rte_gro.h>
> 
>  #include "testpmd.h"
> 
> @@ -386,6 +385,7 @@ uint8_t bitrate_enabled;
>  #endif
> 
>  struct gro_status gro_ports[RTE_MAX_ETHPORTS];
> +uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
> 
>  /* Forward function declarations */
>  static void map_port_queue_stats_mapping_registers(uint8_t pi, struct
> rte_port *port);
> @@ -570,6 +570,7 @@ init_config(void)
>       unsigned int nb_mbuf_per_pool;
>       lcoreid_t  lc_id;
>       uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
> +     struct rte_gro_param gro_param;
> 
>       memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
> 
> @@ -671,6 +672,20 @@ init_config(void)
>               rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
> 
>       fwd_config_setup();
> +
> +     /* create a gro context for each lcore */
> +     gro_param.gro_types = RTE_GRO_TCP_IPV4;
> +     gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
> +     gro_param.max_item_per_flow = MAX_PKT_BURST;
> +     for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
> +             gro_param.socket_id = rte_lcore_to_socket_id(
> +                             fwd_lcores_cpuids[lc_id]);
> +             fwd_lcores[lc_id]->gro_ctx =
> rte_gro_ctx_create(&gro_param);
> +             if (fwd_lcores[lc_id]->gro_ctx == NULL) {
> +                     rte_exit(EXIT_FAILURE,
> +                                     "rte_gro_ctx_create() failed\n");
> +             }
> +     }
>  }
> 
> 
> @@ -1217,6 +1232,7 @@ stop_packet_forwarding(void)
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>       uint64_t fwd_cycles;
>  #endif
> +
>       static const char *acc_stats_border = "+++++++++++++++";
> 
>       if (test_done) {
> @@ -1307,6 +1323,7 @@ stop_packet_forwarding(void)
> 
>               fwd_port_stats_display(pt_id, &stats);
>       }
> +
>       printf("\n  %s Accumulated forward statistics for all ports"
>              "%s\n",
>              acc_stats_border, acc_stats_border);
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index 1d1ee75..9433eae 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -120,6 +120,7 @@ struct fwd_stream {
>       unsigned int fwd_dropped; /**< received packets not forwarded */
>       unsigned int rx_bad_ip_csum ; /**< received packets has bad ip
> checksum */
>       unsigned int rx_bad_l4_csum ; /**< received packets has bad l4
> checksum */
> +     unsigned int gro_times; /**< GRO operation times */
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>       uint64_t     core_cycles; /**< used for RX and TX processing */
>  #endif
> @@ -206,6 +207,7 @@ struct rte_port {
>   */
>  struct fwd_lcore {
>       struct rte_mempool *mbp; /**< The mbuf pool to use by this core */
> +     void *gro_ctx;          /**< GRO context */
>       streamid_t stream_idx;   /**< index of 1st stream in "fwd_streams"
> */
>       streamid_t stream_nb;    /**< number of streams in "fwd_streams"
> */
>       lcoreid_t  cpuid_idx;    /**< index of logical core in CPU id table */
> @@ -434,13 +436,19 @@ extern struct ether_addr
> peer_eth_addrs[RTE_MAX_ETHPORTS];
>  extern uint32_t burst_tx_delay_time; /**< Burst tx delay time(us) for mac-
> retry. */
>  extern uint32_t burst_tx_retry_num;  /**< Burst tx retry number for mac-
> retry. */
> 
> -#define GRO_DEFAULT_FLOW_NUM 4
> -#define GRO_DEFAULT_ITEM_NUM_PER_FLOW DEF_PKT_BURST
> +#define GRO_DEFAULT_ITEM_NUM_PER_FLOW 32
> +#define GRO_DEFAULT_FLOW_NUM (RTE_GRO_MAX_BURST_ITEM_NUM
> / \
> +             GRO_DEFAULT_ITEM_NUM_PER_FLOW)
> +
> +#define GRO_DEFAULT_FLUSH_CYCLES 1
> +#define GRO_MAX_FLUSH_CYCLES 4
> +
>  struct gro_status {
>       struct rte_gro_param param;
>       uint8_t enable;
>  };
>  extern struct gro_status gro_ports[RTE_MAX_ETHPORTS];
> +extern uint8_t gro_flush_cycles;
> 
>  static inline unsigned int
>  lcore_num(void)
> @@ -641,7 +649,9 @@ void get_2tuple_filter(uint8_t port_id, uint16_t
> index);
>  void get_5tuple_filter(uint8_t port_id, uint16_t index);
>  int rx_queue_id_is_invalid(queueid_t rxq_id);
>  int tx_queue_id_is_invalid(queueid_t txq_id);
> -void setup_gro(const char *mode, uint8_t port_id);
> +void setup_gro(const char *onoff, uint8_t port_id);
> +void setup_gro_flush_cycles(uint8_t cycles);
> +void show_gro(uint8_t port_id);
> 
>  /* Functions to manage the set of filtered Multicast MAC addresses */
>  void mcast_addr_add(uint8_t port_id, struct ether_addr *mc_addr);
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 2ed62f5..74a1fb4 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -898,12 +898,12 @@ Display the status of TCP Segmentation Offload::
> 
>     testpmd> tso show (port_id)
> 
> -gro
> -~~~
> +set port - gro
> +~~~~~~~~~~~~~~
> 
>  Enable or disable GRO in ``csum`` forwarding engine::
> 
> -   testpmd> gro (on|off) (port_id)
> +   testpmd> set port <port_id> gro on|off
> 
>  If enabled, the csum forwarding engine will perform GRO on the TCP/IPv4
>  packets received from the given port.
> @@ -914,23 +914,43 @@ GRO. By default, GRO is disabled for all ports.
>  .. note::
> 
>     When enable GRO for a port, TCP/IPv4 packets received from the port
> -   will be performed GRO. After GRO, the merged packets are multi-
> segments.
> -   But csum forwarding engine doesn't support to calculate TCP checksum
> -   for multi-segment packets in SW. So please select TCP HW checksum
> -   calculation for the port which GROed packets are transmitted to.
> +   will be performed GRO. After GRO, all merged packets have bad
> +   checksums, since the GRO library doesn't re-calculate checksums for
> +   the merged packets. Therefore, if users want the merged packets to
> +   have correct checksums, please select HW IP checksum calculation and
> +   HW TCP checksum calculation for the port which the merged packets are
> +   transmitted to.
> +
> +show port - gro
> +~~~~~~~~~~~~~~~
> 
> -gro set
> -~~~~~~~
> +Display GRO configuration for a given port::
> +
> +   testpmd> show port <port_id> gro
> +
> +set gro flush
> +~~~~~~~~~~~~~
> +
> +Set the cycle to flush the GROed packets from reassembly tables::
> +
> +   testpmd> set gro flush <cycles>
> 
> -Set max flow number and max packet number per-flow for GRO::
> +When enable GRO, the csum forwarding engine performs GRO on received
> +packets, and the GROed packets are stored in reassembly tables. Users
> +can use this command to determine when the GROed packets are flushed
> +from the reassembly tables.
> 
> -   testpmd> gro set (max_flow_num) (max_item_num_per_flow) (port_id)
> +The ``cycles`` is measured in GRO operation times. The csum forwarding
> +engine flushes the GROed packets from the tables every ``cycles`` GRO
> +operations.
> 
> -The product of ``max_flow_num`` and ``max_item_num_per_flow`` is the
> max
> -number of packets a GRO table can store.
> +By default, the value of ``cycles`` is 1, which means flush GROed packets
> +from the reassembly tables as soon as one GRO operation finishes. The
> value
> +of ``cycles`` should be in the range of 1 to ``GRO_MAX_FLUSH_CYCLES``.
> 
> -If current packet number is greater than or equal to the max value, GRO
> -will stop processing incoming packets.
> +Please note that the large value of ``cycles`` may cause the poor TCP/IP
> +stack performance. Because the GROed packets are delayed to arrive the
> +stack, thus causing more duplicated ACKs and TCP retransmissions.
> 
>  mac_addr add
>  ~~~~~~~~~~~~
> --
> 2.7.4

Re: [dpdk-dev] [PATCH v4] app/testpmd: enable the heavyweight mode TCP/IPv4 GRO

Reply via email to