This allows us to control the algorithm used to spread traffic between sockets, adding more fine grained control. If the user does not specify a fanout mode, the PMD driver will default to PACKET_FANOUT_HASH.
Signed-off-by: Tudor Cornea <tudor.cor...@gmail.com> --- v3: * Removed PACKET_FANOUT ifdef. The feature has existed since Linux 3.1 * Removed PACKET_FANOUT_FLAG_ROLLOVER ifdef * Simplified the get_fanout() function * Renamed mode variable to load_balance. I was also thinking of load_balance_mode as an alternative. * Removed space between get_fanout and following if statement * Improved documentation. Added link to manual page for PACKET_FANOUT v2: * Renamed the patch * Replaced packet_fanout argument with fanout_mode, which allows more fine grained control --- doc/guides/nics/af_packet.rst | 16 +++- drivers/net/af_packet/rte_eth_af_packet.c | 89 ++++++++++++++++++----- 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst index a343d3a961..2ec9e9e674 100644 --- a/doc/guides/nics/af_packet.rst +++ b/doc/guides/nics/af_packet.rst @@ -13,8 +13,6 @@ PACKET_MMAP, which provides a mmapped ring buffer, shared between user space and kernel, that's used to send and receive packets. This helps reducing system calls and the copies needed between user space and Kernel. -The PACKET_FANOUT_HASH behavior of AF_PACKET is used for frame reception. - Options and inherent limitations -------------------------------- @@ -25,11 +23,23 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings. * ``qpairs`` - number of Rx and Tx queues (optional, default 1); * ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional, disabled by default); +* ``fanout_mode`` - set fanout algorithm. + Possible choices: hash, lb, cpu, rollover, rnd, qm (optional, default hash); * ``blocksz`` - PACKET_MMAP block size (optional, default 4096); * ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple of 16B); * ``framecnt`` - PACKET_MMAP frame count (optional, default 512). +For details regarding ``fanout_mode`` argument, you can consult the +`PACKET_FANOUT documentation <https://www.man7.org/linux/man-pages/man7/packet.7.html>`_. + +As an example, when ``fanout_mode=cpu`` is selected, the PACKET_FANOUT_CPU +mode will be set on the sockets, so that on frame reception, the socket +will be selected based on the CPU on which the packet arrived. + +Only one ``fanout_mode`` can be chosen. If left unspecified, the default is to +use the PACKET_FANOUT_HASH behavior of AF_PACKET for frame reception. + Because this implementation is based on PACKET_MMAP, and PACKET_MMAP has its own pre-requisites, it should be noted that the inner workings of PACKET_MMAP should be carefully considered before modifying some of these options (namely, @@ -64,7 +74,7 @@ framecnt=512): .. code-block:: console - --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0 + --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash Features and Limitations ------------------------ diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c index ceb8d9356a..9ca43fc54e 100644 --- a/drivers/net/af_packet/rte_eth_af_packet.c +++ b/drivers/net/af_packet/rte_eth_af_packet.c @@ -36,6 +36,7 @@ #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz" #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt" #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass" +#define ETH_AF_PACKET_FANOUT_MODE_ARG "fanout_mode" #define DFLT_FRAME_SIZE (1 << 11) #define DFLT_FRAME_COUNT (1 << 9) @@ -96,6 +97,7 @@ static const char *valid_arguments[] = { ETH_AF_PACKET_FRAMESIZE_ARG, ETH_AF_PACKET_FRAMECOUNT_ARG, ETH_AF_PACKET_QDISC_BYPASS_ARG, + ETH_AF_PACKET_FANOUT_MODE_ARG, NULL }; @@ -700,6 +702,53 @@ open_packet_iface(const char *key __rte_unused, return 0; } +#define PACKET_FANOUT_INVALID -1 + +static int +get_fanout_group_id(int if_index) +{ + return (getpid() ^ if_index) & 0xffff; +} + +static int +get_fanout_mode(const char *fanout_mode) +{ + int load_balance = PACKET_FANOUT_FLAG_DEFRAG | + PACKET_FANOUT_FLAG_ROLLOVER; + + if (!fanout_mode) { + /* Default */ + load_balance |= PACKET_FANOUT_HASH; + } else if (!strcmp(fanout_mode, "hash")) { + load_balance |= PACKET_FANOUT_HASH; + } else if (!strcmp(fanout_mode, "lb")) { + load_balance |= PACKET_FANOUT_LB; + } else if (!strcmp(fanout_mode, "cpu")) { + load_balance |= PACKET_FANOUT_CPU; + } else if (!strcmp(fanout_mode, "rollover")) { + load_balance |= PACKET_FANOUT_ROLLOVER; + } else if (!strcmp(fanout_mode, "rnd")) { + load_balance |= PACKET_FANOUT_RND; + } else if (!strcmp(fanout_mode, "qm")) { + load_balance |= PACKET_FANOUT_QM; + } else { + /* Invalid Fanout Mode */ + load_balance = PACKET_FANOUT_INVALID; + } + + return load_balance; +} + +static int +get_fanout(const char *fanout_mode, int if_index) +{ + int load_balance = get_fanout_mode(fanout_mode); + if (load_balance != PACKET_FANOUT_INVALID) + return get_fanout_group_id(if_index) | (load_balance << 16); + else + return PACKET_FANOUT_INVALID; +} + static int rte_pmd_init_internals(struct rte_vdev_device *dev, const int sockfd, @@ -709,6 +758,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, unsigned int framesize, unsigned int framecnt, unsigned int qdisc_bypass, + const char *fanout_mode, struct pmd_internals **internals, struct rte_eth_dev **eth_dev, struct rte_kvargs *kvlist) @@ -727,9 +777,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, int rc, tpver, discard; int qsockfd = -1; unsigned int i, q, rdsize; -#if defined(PACKET_FANOUT) int fanout_arg; -#endif for (k_idx = 0; k_idx < kvlist->count; k_idx++) { pair = &kvlist->pairs[k_idx]; @@ -809,13 +857,11 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, sockaddr.sll_protocol = htons(ETH_P_ALL); sockaddr.sll_ifindex = (*internals)->if_index; -#if defined(PACKET_FANOUT) - fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff; - fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16; -#if defined(PACKET_FANOUT_FLAG_ROLLOVER) - fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16; -#endif -#endif + fanout_arg = get_fanout(fanout_mode, (*internals)->if_index); + if (fanout_arg == PACKET_FANOUT_INVALID) { + PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode); + goto error; + } for (q = 0; q < nb_queues; q++) { /* Open an AF_PACKET socket for this queue... */ @@ -926,16 +972,17 @@ rte_pmd_init_internals(struct rte_vdev_device *dev, goto error; } -#if defined(PACKET_FANOUT) - rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, - &fanout_arg, sizeof(fanout_arg)); - if (rc == -1) { - PMD_LOG_ERRNO(ERR, - "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s", - name, pair->value); - goto error; + if (nb_queues > 1) { + rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT, + &fanout_arg, sizeof(fanout_arg)); + if (rc == -1) { + PMD_LOG_ERRNO(ERR, + "%s: could not set PACKET_FANOUT " + "on AF_PACKET socket for %s", + name, pair->value); + goto error; + } } -#endif } /* reserve an ethdev entry */ @@ -1003,6 +1050,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev, unsigned int framecount = DFLT_FRAME_COUNT; unsigned int qpairs = 1; unsigned int qdisc_bypass = 1; + const char *fanout_mode = NULL; /* do some parameter checking */ if (*sockfd < 0) @@ -1065,6 +1113,10 @@ rte_eth_from_packet(struct rte_vdev_device *dev, } continue; } + if (strstr(pair->key, ETH_AF_PACKET_FANOUT_MODE_ARG) != NULL) { + fanout_mode = pair->value; + continue; + } } if (framesize > blocksize) { @@ -1091,6 +1143,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev, blocksize, blockcount, framesize, framecount, qdisc_bypass, + fanout_mode, &internals, ð_dev, kvlist) < 0) return -1; -- 2.34.1