Hi Jianfeng, > -----Original Message----- > From: Tan, Jianfeng > Sent: Tuesday, March 01, 2016 1:24 AM > To: dev at dpdk.org > Cc: Zhang, Helin; Ananyev, Konstantin; nelio.laranjeiro at 6wind.com; > adrien.mazarguil at 6wind.com; rahul.lakkireddy at chelsio.com; > pmatilai at redhat.com; Tan, Jianfeng > Subject: [PATCH] examples/l3fwd: fix using packet type blindly > > As a example to use ptype info, l3fwd needs firstly to use > rte_eth_dev_get_ptype_info() API to check if device and/or > its PMD driver will parse and fill the needed packet type; > if not, use the newly added option, --parse-ptype, to > analyze it in the callback softly. > > As the mode of EXACT_MATCH uses the 5 tuples to caculate > hash, so we narrow down its scope to: > a. ip packets with no extensions, and > b. L4 payload should be either tcp or udp. > > Note: this patch does not completely solve the issue, "cannot > run l3fwd on virtio or other devices", because hw_ip_checksum > may be not supported by the devices. Currently we can: option > 1, remove this requirements; option 2, wait for virtio front > end (pmd) to support it. > > Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com> > --- > doc/guides/rel_notes/release_16_04.rst | 5 ++ > doc/guides/sample_app_ug/l3_forward.rst | 6 ++- > examples/l3fwd/l3fwd.h | 12 +++++ > examples/l3fwd/l3fwd_em.c | 94 > +++++++++++++++++++++++++++++++++ > examples/l3fwd/l3fwd_lpm.c | 57 ++++++++++++++++++++ > examples/l3fwd/main.c | 50 ++++++++++++++++++ > 6 files changed, 223 insertions(+), 1 deletion(-) > > diff --git a/doc/guides/rel_notes/release_16_04.rst > b/doc/guides/rel_notes/release_16_04.rst > index 64e913d..4d6260e 100644 > --- a/doc/guides/rel_notes/release_16_04.rst > +++ b/doc/guides/rel_notes/release_16_04.rst > @@ -68,6 +68,11 @@ This section should contain bug fixes added to the > relevant sections. Sample for > vhost-switch often fails to allocate mbuf when dequeue from vring because > it > wrongly calculates the number of mbufs needed. > > +* **examples/l3fwd: Fixed using packet type blindly.** > + > + l3fwd makes use of packet type information without even query if devices > or PMDs > + really set it. For those don't set ptypes, add an option to parse it > softly. > + > > EAL > ~~~ > diff --git a/doc/guides/sample_app_ug/l3_forward.rst > b/doc/guides/sample_app_ug/l3_forward.rst > index 4ce734b..e0c22e3 100644 > --- a/doc/guides/sample_app_ug/l3_forward.rst > +++ b/doc/guides/sample_app_ug/l3_forward.rst > @@ -93,7 +93,7 @@ The application has a number of command line options: > > .. code-block:: console > > - ./build/l3fwd [EAL options] -- -p PORTMASK [-P] > --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len > PKTLEN]] [--no-numa][--hash-entry-num][--ipv6] > + ./build/l3fwd [EAL options] -- -p PORTMASK [-P] > --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len > PKTLEN]] [--no-numa][--hash-entry-num][--ipv6] [--parse-ptype] > > where, > > @@ -114,6 +114,8 @@ where, > > * --ipv6: optional, set it if running ipv6 packets > > +* --parse-ptype: optional, set it if use software way to analyze packet > type > + > For example, consider a dual processor socket platform where cores 0-7 and > 16-23 appear on socket 0, while cores 8-15 and 24-31 > appear on socket 1. > Let's say that the programmer wants to use memory from both NUMA nodes, the > platform has only two ports, one connected to > each NUMA node, > and the programmer wants to use two cores from each processor socket to do > the packet processing. > @@ -334,6 +336,8 @@ The key code snippet of simple_ipv4_fwd_4pkts() is shown > below: > > The simple_ipv6_fwd_4pkts() function is similar to the > simple_ipv4_fwd_4pkts() function. > > +Known issue: IP packets with extensions or IP packets which are not TCP/UDP > cannot work well with this mode. > + > Packet Forwarding for LPM-based Lookups > ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h > index da6d369..966c83b 100644 > --- a/examples/l3fwd/l3fwd.h > +++ b/examples/l3fwd/l3fwd.h > @@ -198,6 +198,18 @@ void > setup_hash(const int socketid); > > int > +em_check_ptype(int portid); > + > +int > +lpm_check_ptype(int portid); > + > +void > +em_parse_ptype(struct rte_mbuf *); > + > +void > +lpm_parse_ptype(struct rte_mbuf *); > + > +int > em_main_loop(__attribute__((unused)) void *dummy); > > int > diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c > index f6a65d8..1061baf 100644 > --- a/examples/l3fwd/l3fwd_em.c > +++ b/examples/l3fwd/l3fwd_em.c > @@ -42,6 +42,7 @@ > #include <errno.h> > #include <getopt.h> > #include <stdbool.h> > +#include <netinet/in.h> > > #include <rte_debug.h> > #include <rte_ether.h> > @@ -508,6 +509,99 @@ populate_ipv6_many_flow_into_table(const struct rte_hash > *h, > printf("Hash: Adding 0x%x keys\n", nr_flow); > } > > +/* Requirements: > + * 1. IP packets without extension; > + * 2. L4 payload should be either TCP or UDP. > + */ > +int > +em_check_ptype(int portid) > +{ > + int i, ret; > + int ptype_l3_ipv4_ext = 0; > + int ptype_l3_ipv6_ext = 0; > + int ptype_l4_tcp = 0; > + int ptype_l4_udp = 0; > + > + ret = rte_eth_dev_get_ptype_info(portid, > + RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK, > + NULL, 0); > + if (ret <= 0) > + return 0; > + > + uint32_t ptypes[ret]; > + > + ret = rte_eth_dev_get_ptype_info(portid, > + RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK, > + ptypes, ret); > + for (i = 0; i < ret; ++i) { > + switch (ptypes[i]) { > + case RTE_PTYPE_L3_IPV4_EXT: > + ptype_l3_ipv4_ext = 1; > + break; > + case RTE_PTYPE_L3_IPV6_EXT: > + ptype_l3_ipv6_ext = 1; > + break; > + case RTE_PTYPE_L4_TCP: > + ptype_l4_tcp = 1; > + break; > + case RTE_PTYPE_L4_UDP: > + ptype_l4_udp = 1; > + break; > + } > + } > + > + if (ptype_l3_ipv4_ext == 0) > + printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid); > + if (ptype_l3_ipv6_ext == 0) > + printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid); > + if (ptype_l3_ipv4_ext && ptype_l3_ipv6_ext) > + return 1;
Why return here? You'll miss L4 ptype checks below. > + > + if (ptype_l4_tcp == 0) > + printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid); > + if (ptype_l4_udp == 0) > + printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid); > + if (ptype_l4_tcp || ptype_l4_udp) > + return 1; > + > + return 0; > +} > + > +void > +em_parse_ptype(struct rte_mbuf *m) > +{ > + struct ether_hdr *eth_hdr; > + uint32_t packet_type = 0; > + uint16_t ethertype; > + void *l4; > + int hdr_len; > + struct ipv4_hdr *ipv4_hdr; > + struct ipv6_hdr *ipv6_hdr; > + > + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); > + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); > + l4 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr); Just curious why l4? It looks like l3 :) > + switch (ethertype) { > + case ETHER_TYPE_IPv4: > + ipv4_hdr = (struct ipv4_hdr *)l4; > + hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * > + IPV4_IHL_MULTIPLIER; > + if (hdr_len == sizeof(struct ipv4_hdr) && > + (ipv4_hdr->next_proto_id == IPPROTO_TCP || > + ipv4_hdr->next_proto_id == IPPROTO_UDP)) > + packet_type |= RTE_PTYPE_L3_IPV4; I think it needs to be something like: If (hdr_len == sizeof(struct ipv4_hdr)) { packet_type = RTE_PTYPE_L3_IPV4; if (ipv4_hdr->next_proto_id == IPPROTO_TCP) packet_type |= RTE_PTYPE_L4_TCP; else if ipv4_hdr->next_proto_id == IPPROTO_UDP) packet_type |= RTE_PTYPE_L4_TCP; } And then inside em forward check ptype to be sure that is IPV4 with no options and UDP/TCP packet. Same for IPv6. > + break; > + case ETHER_TYPE_IPv6: > + ipv6_hdr = (struct ipv6_hdr *)l4; > + if (ipv6_hdr->proto == IPPROTO_TCP || > + ipv6_hdr->proto == IPPROTO_UDP) > + packet_type |= RTE_PTYPE_L3_IPV6; > + break; > + } > + > + m->packet_type |= packet_type; > +} > + > /* main processing loop */ > int > em_main_loop(__attribute__((unused)) void *dummy) > diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c > index e0ed3c4..981227a 100644 > --- a/examples/l3fwd/l3fwd_lpm.c > +++ b/examples/l3fwd/l3fwd_lpm.c > @@ -280,6 +280,63 @@ setup_lpm(const int socketid) > } > } > > +int > +lpm_check_ptype(int portid) > +{ > + int i, ret; > + int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0; > + > + ret = rte_eth_dev_get_ptype_info(portid, RTE_PTYPE_L3_MASK, NULL, 0); > + if (ret <= 0) > + return 0; > + > + uint32_t ptypes[ret]; > + > + ret = rte_eth_dev_get_ptype_info(portid, RTE_PTYPE_L3_MASK, > + ptypes, ret); > + for (i = 0; i < ret; ++i) { > + if (ptypes[i] & RTE_PTYPE_L3_IPV4) > + ptype_l3_ipv4 = 1; > + if (ptypes[i] & RTE_PTYPE_L3_IPV6) > + ptype_l3_ipv6 = 1; > + } > + > + if (ptype_l3_ipv4 == 0) > + printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid); > + > + if (ptype_l3_ipv6 == 0) > + printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid); > + > + if (ptype_l3_ipv4 && ptype_l3_ipv6) > + return 1; > + > + return 0; > + > +} > + > +void > +lpm_parse_ptype(struct rte_mbuf *m) > +{ > + struct ether_hdr *eth_hdr; > + uint32_t packet_type = 0; > + uint16_t ethertype; > + > + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); > + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); > + switch (ethertype) { > + case ETHER_TYPE_IPv4: > + packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; > + break; > + case ETHER_TYPE_IPv6: > + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; > + break; > + default: > + break; > + } > + > + m->packet_type |= packet_type; Might be safer: m->packet_type = packet_type; > +} > + > /* Return ipv4/ipv6 lpm fwd lookup struct. */ > void * > lpm_get_ipv4_l3fwd_lookup_struct(const int socketid) > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c > index 0e33039..8889828 100644 > --- a/examples/l3fwd/main.c > +++ b/examples/l3fwd/main.c > @@ -103,6 +103,8 @@ static int l3fwd_lpm_on; > static int l3fwd_em_on; > > static int numa_on = 1; /**< NUMA is enabled by default. */ > +static int parse_ptype; /**< Parse packet type using rx callback, and */ > + /**< disabled by default */ > > /* Global variables. */ > > @@ -172,6 +174,8 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; > > struct l3fwd_lkp_mode { > void (*setup)(int); > + int (*check_ptype)(int); > + void (*parse_ptype)(struct rte_mbuf *); > int (*main_loop)(void *); > void* (*get_ipv4_lookup_struct)(int); > void* (*get_ipv6_lookup_struct)(int); > @@ -181,6 +185,8 @@ static struct l3fwd_lkp_mode l3fwd_lkp; > > static struct l3fwd_lkp_mode l3fwd_em_lkp = { > .setup = setup_hash, > + .check_ptype = em_check_ptype, > + .parse_ptype = em_parse_ptype, > .main_loop = em_main_loop, > .get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct, > .get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct, > @@ -188,6 +194,8 @@ static struct l3fwd_lkp_mode l3fwd_em_lkp = { > > static struct l3fwd_lkp_mode l3fwd_lpm_lkp = { > .setup = setup_lpm, > + .check_ptype = lpm_check_ptype, > + .parse_ptype = lpm_parse_ptype, > .main_loop = lpm_main_loop, > .get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct, > .get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct, > @@ -209,6 +217,22 @@ setup_l3fwd_lookup_tables(void) > l3fwd_lkp = l3fwd_lpm_lkp; > } > > +static uint16_t > +cb_parse_packet_type(uint8_t port __rte_unused, > + uint16_t queue __rte_unused, > + struct rte_mbuf *pkts[], > + uint16_t nb_pkts, > + uint16_t max_pkts __rte_unused, > + void *user_param __rte_unused) > +{ > + unsigned i; > + > + for (i = 0; i < nb_pkts; ++i) > + l3fwd_lkp.parse_ptype(pkts[i]); No need to create callback chains. That way you have extra call per packet. Just 2 callbaclks: cb_lpm_parse_ptype & cb_em_parse_ptype, and then register one depending on which mode are we in. Would be simpler and faster, I believe. Konstantin > + > + return nb_pkts; > +} > + > static int > check_lcore_params(void) > { > @@ -456,6 +480,7 @@ parse_eth_dest(const char *optarg) > #define CMD_LINE_OPT_IPV6 "ipv6" > #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" > #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num" > +#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" > > /* > * This expression is used to calculate the number of mbufs needed > @@ -486,6 +511,7 @@ parse_args(int argc, char **argv) > {CMD_LINE_OPT_IPV6, 0, 0, 0}, > {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, > {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0}, > + {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, > {NULL, 0, 0, 0} > }; > > @@ -612,6 +638,14 @@ parse_args(int argc, char **argv) > return -1; > } > } > + > + if (!strncmp(lgopts[option_index].name, > + CMD_LINE_OPT_PARSE_PTYPE, > + sizeof(CMD_LINE_OPT_PARSE_PTYPE))) { > + printf("soft parse-ptype is enabled\n"); > + parse_ptype = 1; > + } > + > break; > > default: > @@ -938,6 +972,22 @@ main(int argc, char **argv) > rte_exit(EXIT_FAILURE, > "rte_eth_rx_queue_setup: err=%d, port=%d\n", > ret, portid); > + > + ret = l3fwd_lkp.check_ptype(portid); > + if (ret) > + continue; > + if (!parse_ptype) > + rte_exit(EXIT_FAILURE, > + "port %d cannot parse packet type, > please add --%s\n", > + portid, CMD_LINE_OPT_PARSE_PTYPE); > + > + if (rte_eth_add_rx_callback(portid, queueid, > + cb_parse_packet_type, > + NULL)) > + continue; > + rte_exit(EXIT_FAILURE, > + "Failed to add rx callback: port=%d\n", > + portid); > } > } > > -- > 2.1.4