Acked-by: Nithin Dabilpuram<ndabilpu...@marvell.com>
On Wed, May 31, 2023 at 5:08 PM Amit Prakash Shukla <amitpraka...@marvell.com> wrote: > > From: Sunil Kumar Kori <sk...@marvell.com> > > Similar to IPv4 lookup node, patch adds IPv6 lookup > node. > > Signed-off-by: Sunil Kumar Kori <sk...@marvell.com> > Signed-off-by: Amit Prakash Shukla <amitpraka...@marvell.com> > --- > v2: > - Performance related changes > > v3: > - Removing redundant dynamic variable > > doc/guides/prog_guide/graph_lib.rst | 13 + > lib/node/ip6_lookup.c | 372 ++++++++++++++++++++++++++++ > lib/node/meson.build | 3 +- > lib/node/node_private.h | 2 +- > lib/node/pkt_cls.c | 14 ++ > lib/node/pkt_cls_priv.h | 1 + > lib/node/rte_node_ip6_api.h | 80 ++++++ > lib/node/version.map | 2 + > 8 files changed, 485 insertions(+), 2 deletions(-) > create mode 100644 lib/node/ip6_lookup.c > create mode 100644 lib/node/rte_node_ip6_api.h > > diff --git a/doc/guides/prog_guide/graph_lib.rst > b/doc/guides/prog_guide/graph_lib.rst > index 1cfdc86433..1f70d63628 100644 > --- a/doc/guides/prog_guide/graph_lib.rst > +++ b/doc/guides/prog_guide/graph_lib.rst > @@ -388,6 +388,19 @@ to determine the L2 header to be written to the packet > before sending > the packet out to a particular ethdev_tx node. > ``rte_node_ip4_rewrite_add()`` is control path API to add next-hop info. > > +ip6_lookup > +~~~~~~~~~~ > +This node is an intermediate node that does LPM lookup for the received > +ipv6 packets and the result determines each packets next node. > + > +On successful LPM lookup, the result contains the ``next_node`` id and > +``next-hop`` id with which the packet needs to be further processed. > + > +On LPM lookup failure, objects are redirected to pkt_drop node. > +``rte_node_ip6_route_add()`` is control path API to add ipv6 routes. > +To achieve home run, node use ``rte_node_stream_move()`` as mentioned in > above > +sections. > + > null > ~~~~ > This node ignores the set of objects passed to it and reports that all are > diff --git a/lib/node/ip6_lookup.c b/lib/node/ip6_lookup.c > new file mode 100644 > index 0000000000..e4bbc7ed67 > --- /dev/null > +++ b/lib/node/ip6_lookup.c > @@ -0,0 +1,372 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2023 Marvell. > + */ > + > +#include <arpa/inet.h> > +#include <sys/socket.h> > + > +#include <rte_ethdev.h> > +#include <rte_ether.h> > +#include <rte_graph.h> > +#include <rte_graph_worker.h> > +#include <rte_ip.h> > +#include <rte_lpm6.h> > + > +#include "rte_node_ip6_api.h" > + > +#include "node_private.h" > + > +#define IPV6_L3FWD_LPM_MAX_RULES 1024 > +#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 8) > + > +/* IP6 Lookup global data struct */ > +struct ip6_lookup_node_main { > + struct rte_lpm6 *lpm_tbl[RTE_MAX_NUMA_NODES]; > +}; > + > +struct ip6_lookup_node_ctx { > + /* Socket's LPM table */ > + struct rte_lpm6 *lpm6; > + /* Dynamic offset to mbuf priv1 */ > + int mbuf_priv1_off; > +}; > + > +static struct ip6_lookup_node_main ip6_lookup_nm; > + > +#define IP6_LOOKUP_NODE_LPM(ctx) \ > + (((struct ip6_lookup_node_ctx *)ctx)->lpm6) > + > +#define IP6_LOOKUP_NODE_PRIV1_OFF(ctx) \ > + (((struct ip6_lookup_node_ctx *)ctx)->mbuf_priv1_off) > + > +static uint16_t > +ip6_lookup_node_process_scalar(struct rte_graph *graph, struct rte_node > *node, > + void **objs, uint16_t nb_objs) > +{ > + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; > + struct rte_lpm6 *lpm6 = IP6_LOOKUP_NODE_LPM(node->ctx); > + const int dyn = IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx); > + struct rte_ipv6_hdr *ipv6_hdr; > + void **to_next, **from; > + uint16_t last_spec = 0; > + rte_edge_t next_index; > + uint16_t n_left_from; > + uint16_t held = 0; > + uint32_t drop_nh; > + int i, rc; > + > + /* Speculative next */ > + next_index = RTE_NODE_IP6_LOOKUP_NEXT_REWRITE; > + /* Drop node */ > + drop_nh = ((uint32_t)RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP) << 16; > + > + pkts = (struct rte_mbuf **)objs; > + from = objs; > + n_left_from = nb_objs; > + > + for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += > OBJS_PER_CLINE) > + rte_prefetch0(&objs[i]); > + > + for (i = 0; i < 4 && i < n_left_from; i++) > + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, > + sizeof(struct > rte_ether_hdr))); > + > + /* Get stream for the speculated next node */ > + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); > + while (n_left_from >= 4) { > + uint8_t ip_batch[4][16]; > + int32_t next_hop[4]; > + uint16_t next[4]; > + > +#if RTE_GRAPH_BURST_SIZE > 64 > + /* Prefetch next-next mbufs */ > + if (likely(n_left_from > 11)) { > + rte_prefetch0(pkts[8]); > + rte_prefetch0(pkts[9]); > + rte_prefetch0(pkts[10]); > + rte_prefetch0(pkts[11]); > + } > +#endif > + /* Prefetch next mbuf data */ > + if (likely(n_left_from > 7)) { > + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, > + sizeof(struct > rte_ether_hdr))); > + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, > + sizeof(struct > rte_ether_hdr))); > + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, > + sizeof(struct > rte_ether_hdr))); > + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, > + sizeof(struct > rte_ether_hdr))); > + } > + > + mbuf0 = pkts[0]; > + mbuf1 = pkts[1]; > + mbuf2 = pkts[2]; > + mbuf3 = pkts[3]; > + > + pkts += 4; > + n_left_from -= 4; > + > + /* Extract DIP of mbuf0 */ > + ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr > *, > + sizeof(struct rte_ether_hdr)); > + /* Extract hop_limits as ipv6 hdr is in cache */ > + node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits; > + rte_memcpy(ip_batch[0], ipv6_hdr->dst_addr, 16); > + > + /* Extract DIP of mbuf1 */ > + ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv6_hdr > *, > + sizeof(struct rte_ether_hdr)); > + /* Extract hop_limits as ipv6 hdr is in cache */ > + node_mbuf_priv1(mbuf1, dyn)->ttl = ipv6_hdr->hop_limits; > + rte_memcpy(ip_batch[1], ipv6_hdr->dst_addr, 16); > + > + /* Extract DIP of mbuf2 */ > + ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv6_hdr > *, > + sizeof(struct rte_ether_hdr)); > + /* Extract hop_limits as ipv6 hdr is in cache */ > + node_mbuf_priv1(mbuf2, dyn)->ttl = ipv6_hdr->hop_limits; > + rte_memcpy(ip_batch[2], ipv6_hdr->dst_addr, 16); > + > + /* Extract DIP of mbuf3 */ > + ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv6_hdr > *, > + sizeof(struct rte_ether_hdr)); > + /* Extract hop_limits as ipv6 hdr is in cache */ > + node_mbuf_priv1(mbuf3, dyn)->ttl = ipv6_hdr->hop_limits; > + rte_memcpy(ip_batch[3], ipv6_hdr->dst_addr, 16); > + > + rc = rte_lpm6_lookup_bulk_func(lpm6, ip_batch, next_hop, 4); > + > + next_hop[0] = (next_hop[0] < 0) ? (int32_t)drop_nh : > next_hop[0]; > + node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop[0]; > + next[0] = (uint16_t)(next_hop[0] >> 16); > + > + next_hop[1] = (next_hop[1] < 0) ? (int32_t)drop_nh : > next_hop[1]; > + node_mbuf_priv1(mbuf1, dyn)->nh = (uint16_t)next_hop[1]; > + next[1] = (uint16_t)(next_hop[1] >> 16); > + > + next_hop[2] = (next_hop[2] < 0) ? (int32_t)drop_nh : > next_hop[2]; > + node_mbuf_priv1(mbuf2, dyn)->nh = (uint16_t)next_hop[2]; > + next[2] = (uint16_t)(next_hop[2] >> 16); > + > + next_hop[3] = (next_hop[3] < 0) ? (int32_t)drop_nh : > next_hop[3]; > + node_mbuf_priv1(mbuf3, dyn)->nh = (uint16_t)next_hop[3]; > + next[3] = (uint16_t)(next_hop[3] >> 16); > + > + rte_edge_t fix_spec = ((next_index == next[0]) && > + (next_index == next[1]) && > + (next_index == next[2]) && > + (next_index == next[3])); > + > + if (unlikely(fix_spec == 0)) { > + /* Copy things successfully speculated till now */ > + rte_memcpy(to_next, from, last_spec * > sizeof(from[0])); > + from += last_spec; > + to_next += last_spec; > + held += last_spec; > + last_spec = 0; > + > + /* Next0 */ > + if (next_index == next[0]) { > + to_next[0] = from[0]; > + to_next++; > + held++; > + } else { > + rte_node_enqueue_x1(graph, node, next[0], > from[0]); > + } > + > + /* Next1 */ > + if (next_index == next[1]) { > + to_next[0] = from[1]; > + to_next++; > + held++; > + } else { > + rte_node_enqueue_x1(graph, node, next[1], > from[1]); > + } > + > + /* Next2 */ > + if (next_index == next[2]) { > + to_next[0] = from[2]; > + to_next++; > + held++; > + } else { > + rte_node_enqueue_x1(graph, node, next[2], > from[2]); > + } > + > + /* Next3 */ > + if (next_index == next[3]) { > + to_next[0] = from[3]; > + to_next++; > + held++; > + } else { > + rte_node_enqueue_x1(graph, node, next[3], > from[3]); > + } > + > + from += 4; > + } else { > + last_spec += 4; > + } > + } > + > + while (n_left_from > 0) { > + uint32_t next_hop; > + uint16_t next0; > + > + mbuf0 = pkts[0]; > + > + pkts += 1; > + n_left_from -= 1; > + > + /* Extract DIP of mbuf0 */ > + ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv6_hdr > *, > + sizeof(struct rte_ether_hdr)); > + /* Extract ttl as ipv6 hdr is in cache */ > + node_mbuf_priv1(mbuf0, dyn)->ttl = ipv6_hdr->hop_limits; > + > + rc = rte_lpm6_lookup(lpm6, ipv6_hdr->dst_addr, &next_hop); > + next_hop = (rc == 0) ? next_hop : drop_nh; > + > + node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop; > + next_hop = next_hop >> 16; > + next0 = (uint16_t)next_hop; > + > + if (unlikely(next_index ^ next0)) { > + /* Copy things successfully speculated till now */ > + rte_memcpy(to_next, from, last_spec * > sizeof(from[0])); > + from += last_spec; > + to_next += last_spec; > + held += last_spec; > + last_spec = 0; > + > + rte_node_enqueue_x1(graph, node, next0, from[0]); > + from += 1; > + } else { > + last_spec += 1; > + } > + } > + > + /* !!! Home run !!! */ > + if (likely(last_spec == nb_objs)) { > + rte_node_next_stream_move(graph, node, next_index); > + return nb_objs; > + } > + held += last_spec; > + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); > + rte_node_next_stream_put(graph, node, next_index, held); > + > + return nb_objs; > +} > + > +int > +rte_node_ip6_route_add(const uint8_t *ip, uint8_t depth, uint16_t next_hop, > + enum rte_node_ip6_lookup_next next_node) > +{ > + char abuf[INET6_ADDRSTRLEN]; > + struct in6_addr in6; > + uint8_t socket; > + uint32_t val; > + int ret; > + > + memcpy(in6.s6_addr, ip, RTE_LPM6_IPV6_ADDR_SIZE); > + inet_ntop(AF_INET6, &in6, abuf, sizeof(abuf)); > + /* Embedded next node id into 24 bit next hop */ > + val = ((next_node << 16) | next_hop) & ((1ull << 24) - 1); > + node_dbg("ip6_lookup", "LPM: Adding route %s / %d nh (0x%x)", abuf, > + depth, val); > + > + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { > + if (!ip6_lookup_nm.lpm_tbl[socket]) > + continue; > + > + ret = rte_lpm6_add(ip6_lookup_nm.lpm_tbl[socket], ip, depth, > + val); > + if (ret < 0) { > + node_err("ip6_lookup", > + "Unable to add entry %s / %d nh (%x) to LPM " > + "table on sock %d, rc=%d\n", > + abuf, depth, val, socket, ret); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int > +setup_lpm6(struct ip6_lookup_node_main *nm, int socket) > +{ > + struct rte_lpm6_config config_ipv6; > + char s[RTE_LPM6_NAMESIZE]; > + > + /* One LPM table per socket */ > + if (nm->lpm_tbl[socket]) > + return 0; > + > + /* create the LPM table */ > + config_ipv6.max_rules = IPV6_L3FWD_LPM_MAX_RULES; > + config_ipv6.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; > + config_ipv6.flags = 0; > + snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socket); > + nm->lpm_tbl[socket] = rte_lpm6_create(s, socket, &config_ipv6); > + if (nm->lpm_tbl[socket] == NULL) > + return -rte_errno; > + > + return 0; > +} > + > +static int > +ip6_lookup_node_init(const struct rte_graph *graph, struct rte_node *node) > +{ > + uint16_t socket, lcore_id; > + static uint8_t init_once; > + int rc; > + > + RTE_SET_USED(graph); > + RTE_BUILD_BUG_ON(sizeof(struct ip6_lookup_node_ctx) > > RTE_NODE_CTX_SZ); > + > + if (!init_once) { > + node_mbuf_priv1_dynfield_offset = > + rte_mbuf_dynfield_register( > + &node_mbuf_priv1_dynfield_desc); > + if (node_mbuf_priv1_dynfield_offset < 0) > + return -rte_errno; > + > + /* Setup LPM tables for all sockets */ > + RTE_LCORE_FOREACH(lcore_id) > + { > + socket = rte_lcore_to_socket_id(lcore_id); > + rc = setup_lpm6(&ip6_lookup_nm, socket); > + if (rc) { > + node_err("ip6_lookup", > + "Failed to setup lpm6 tbl for " > + "sock %u, rc=%d", socket, rc); > + return rc; > + } > + } > + init_once = 1; > + } > + > + /* Update socket's LPM and mbuf dyn priv1 offset in node ctx */ > + IP6_LOOKUP_NODE_LPM(node->ctx) = ip6_lookup_nm.lpm_tbl[graph->socket]; > + IP6_LOOKUP_NODE_PRIV1_OFF(node->ctx) = > + node_mbuf_priv1_dynfield_offset; > + > + node_dbg("ip6_lookup", "Initialized ip6_lookup node"); > + > + return 0; > +} > + > +static struct rte_node_register ip6_lookup_node = { > + .process = ip6_lookup_node_process_scalar, > + .name = "ip6_lookup", > + > + .init = ip6_lookup_node_init, > + > + .nb_edges = RTE_NODE_IP6_LOOKUP_NEXT_MAX, > + .next_nodes = { > + [RTE_NODE_IP6_LOOKUP_NEXT_REWRITE] = "ip6_rewrite", > + [RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP] = "pkt_drop", > + }, > +}; > + > +RTE_NODE_REGISTER(ip6_lookup_node); > diff --git a/lib/node/meson.build b/lib/node/meson.build > index dbdf673c86..cd30847a0b 100644 > --- a/lib/node/meson.build > +++ b/lib/node/meson.build > @@ -12,13 +12,14 @@ sources = files( > 'ethdev_rx.c', > 'ethdev_tx.c', > 'ip4_lookup.c', > + 'ip6_lookup.c', > 'ip4_rewrite.c', > 'log.c', > 'null.c', > 'pkt_cls.c', > 'pkt_drop.c', > ) > -headers = files('rte_node_ip4_api.h', 'rte_node_eth_api.h') > +headers = files('rte_node_ip4_api.h', 'rte_node_ip6_api.h', > 'rte_node_eth_api.h') > # Strict-aliasing rules are violated by uint8_t[] to context size casts. > cflags += '-fno-strict-aliasing' > deps += ['graph', 'mbuf', 'lpm', 'ethdev', 'mempool', 'cryptodev'] > diff --git a/lib/node/node_private.h b/lib/node/node_private.h > index 8c73d5dc10..26135aaa5b 100644 > --- a/lib/node/node_private.h > +++ b/lib/node/node_private.h > @@ -26,7 +26,7 @@ extern int rte_node_logtype; > */ > struct node_mbuf_priv1 { > union { > - /* IP4 rewrite */ > + /* IP4/IP6 rewrite */ > struct { > uint16_t nh; > uint16_t ttl; > diff --git a/lib/node/pkt_cls.c b/lib/node/pkt_cls.c > index 3e75f2cf78..a8302b8d28 100644 > --- a/lib/node/pkt_cls.c > +++ b/lib/node/pkt_cls.c > @@ -24,6 +24,19 @@ static const uint8_t p_nxt[256] __rte_cache_aligned = { > > [RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L2_ETHER] = > PKT_CLS_NEXT_IP4_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6] = PKT_CLS_NEXT_IP6_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6_EXT] = PKT_CLS_NEXT_IP6_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6_EXT_UNKNOWN] = PKT_CLS_NEXT_IP6_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER] = PKT_CLS_NEXT_IP6_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER] = > PKT_CLS_NEXT_IP6_LOOKUP, > + > + [RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L2_ETHER] = > + PKT_CLS_NEXT_IP6_LOOKUP, > }; > > static uint16_t > @@ -216,6 +229,7 @@ struct rte_node_register pkt_cls_node = { > /* Pkt drop node starts at '0' */ > [PKT_CLS_NEXT_PKT_DROP] = "pkt_drop", > [PKT_CLS_NEXT_IP4_LOOKUP] = "ip4_lookup", > + [PKT_CLS_NEXT_IP6_LOOKUP] = "ip6_lookup", > }, > }; > RTE_NODE_REGISTER(pkt_cls_node); > diff --git a/lib/node/pkt_cls_priv.h b/lib/node/pkt_cls_priv.h > index 6f5374f0be..16135807a1 100644 > --- a/lib/node/pkt_cls_priv.h > +++ b/lib/node/pkt_cls_priv.h > @@ -13,6 +13,7 @@ struct pkt_cls_node_ctx { > enum pkt_cls_next_nodes { > PKT_CLS_NEXT_PKT_DROP, > PKT_CLS_NEXT_IP4_LOOKUP, > + PKT_CLS_NEXT_IP6_LOOKUP, > PKT_CLS_NEXT_MAX, > }; > > diff --git a/lib/node/rte_node_ip6_api.h b/lib/node/rte_node_ip6_api.h > new file mode 100644 > index 0000000000..1696ed154d > --- /dev/null > +++ b/lib/node/rte_node_ip6_api.h > @@ -0,0 +1,80 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2023 Marvell. > + */ > + > +#ifndef __INCLUDE_RTE_NODE_IP6_API_H__ > +#define __INCLUDE_RTE_NODE_IP6_API_H__ > + > +/** > + * @file rte_node_ip6_api.h > + * > + * @warning > + * @b EXPERIMENTAL: > + * All functions in this file may be changed or removed without prior notice. > + * > + * This API allows to do control path functions of ip6_* nodes > + * like ip6_lookup, ip6_rewrite. > + * > + */ > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <rte_common.h> > +#include <rte_compat.h> > + > +/** > + * IP6 lookup next nodes. > + */ > +enum rte_node_ip6_lookup_next { > + RTE_NODE_IP6_LOOKUP_NEXT_REWRITE, > + /**< Rewrite node. */ > + RTE_NODE_IP6_LOOKUP_NEXT_PKT_DROP, > + /**< Packet drop node. */ > + RTE_NODE_IP6_LOOKUP_NEXT_MAX, > + /**< Number of next nodes of lookup node. */ > +}; > + > +/** > + * Add ipv6 route to lookup table. > + * > + * @param ip > + * IPv6 address of route to be added. > + * @param depth > + * Depth of the rule to be added. > + * @param next_hop > + * Next hop id of the rule result to be added. > + * @param next_node > + * Next node to redirect traffic to. > + * > + * @return > + * 0 on success, negative otherwise. > + */ > +__rte_experimental > +int rte_node_ip6_route_add(const uint8_t *ip, uint8_t depth, uint16_t > next_hop, > + enum rte_node_ip6_lookup_next next_node); > + > +/** > + * Add a next hop's rewrite data. > + * > + * @param next_hop > + * Next hop id to add rewrite data to. > + * @param rewrite_data > + * Rewrite data. > + * @param rewrite_len > + * Length of rewrite data. > + * @param dst_port > + * Destination port to redirect traffic to. > + * > + * @return > + * 0 on success, negative otherwise. > + */ > +__rte_experimental > +int rte_node_ip6_rewrite_add(uint16_t next_hop, uint8_t *rewrite_data, > + uint8_t rewrite_len, uint16_t dst_port); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* __INCLUDE_RTE_NODE_IP6_API_H__ */ > diff --git a/lib/node/version.map b/lib/node/version.map > index a799b0d389..40df308bfe 100644 > --- a/lib/node/version.map > +++ b/lib/node/version.map > @@ -4,6 +4,8 @@ EXPERIMENTAL { > rte_node_eth_config; > rte_node_ip4_route_add; > rte_node_ip4_rewrite_add; > + rte_node_ip6_rewrite_add; > + rte_node_ip6_route_add; > rte_node_logtype; > local: *; > }; > -- > 2.25.1 >