> On Sep 27, 2018, at 6:19 AM, Raslan Darawsheh <rasl...@mellanox.com> wrote:
>
> In the case the device is created by the primary process,
> the secondary must request some file descriptors to attach the queues.
> The file descriptors are shared via IPC Unix socket.
>
> Thanks to the IPC synchronization, the secondary process
> is now able to do Rx/Tx on a TAP created by the primary process.
>
> Signed-off-by: Raslan Darawsheh <rasl...@mellanox.com>
> Signed-off-by: Thomas Monjalon <tho...@monjalon.net>
>
> ---
> v2:
> - translate file descriptors via IPC API
> - add documentation
> v3:
> - rabse the commit
> - use private static array for fd's to be local for each process
>
> ---
> ---
> doc/guides/nics/tap.rst | 16 ++++
> doc/guides/rel_notes/release_18_11.rst | 4 +
> drivers/net/tap/Makefile | 1 +
> drivers/net/tap/rte_eth_tap.c | 133 +++++++++++++++++++++++++++++++++
> 4 files changed, 154 insertions(+)
>
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index 2714868..d1f3e1c 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -152,6 +152,22 @@ Distribute IPv4 TCP packets using RSS to a given MAC
> address over queues 0-3::
> testpmd> flow create 0 priority 4 ingress pattern eth dst is
> 0a:0b:0c:0d:0e:0f \
> / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
>
> +Multi-process sharing
> +---------------------
> +
> +It is possible to attach an existing TAP device in a secondary process,
> +by declaring it as a vdev with the same name as in the primary process,
> +and without any parameter.
> +
> +The port attached in a secondary process will give access to the
> +statistics and the queues.
> +Therefore it can be used for monitoring or Rx/Tx processing.
> +
> +The IPC synchronization of Rx/Tx queues is currently limited:
> +
> + - Only 8 queues
> + - Synchronized on probing, but not on later port update
> +
> Example
> -------
>
> diff --git a/doc/guides/rel_notes/release_18_11.rst
> b/doc/guides/rel_notes/release_18_11.rst
> index 8c4bb54..a9dda5b 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -67,6 +67,10 @@ New Features
> SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
> vdev_netvsc, tap, and failsafe drivers combination.
>
> +* **Added TAP Rx/Tx queues sharing with a secondary process.**
> +
> + A secondary process can attach a TAP device created in the primary process,
> + probe the queues, and process Rx/Tx in a secondary process.
>
> API Changes
> -----------
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index 3243365..7748283 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -22,6 +22,7 @@ CFLAGS += -O3
> CFLAGS += -I$(SRCDIR)
> CFLAGS += -I.
> CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev -lrte_gso
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 8cc4552..8d276eb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
>
> #include <assert.h>
> #include <sys/types.h>
> @@ -62,6 +64,9 @@
> #define TAP_GSO_MBUFS_NUM \
> (TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
>
> +/* IPC key for queue fds sync */
> +#define TAP_MP_KEY "tap_mp_sync_queues"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> static struct pmd_process_private *process_private;
> @@ -101,6 +106,17 @@ enum ioctl_mode {
> REMOTE_ONLY,
> };
>
> +/* Message header to synchronize queues via IPC */
> +struct ipc_queues {
> + char port_name[RTE_DEV_NAME_MAX_LEN];
> + int rxq_count;
> + int txq_count;
> + /*
> + * The file descriptors are in the dedicated part
> + * of the Unix message to be translated by the kernel.
> + */
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
>
> /**
> @@ -1980,6 +1996,100 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> return ret;
> }
>
> +/* Request queue file descriptors from secondary to primary. */
> +static int
> +tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> +{
> + int ret;
> + struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> + struct rte_mp_msg request, *reply;
> + struct rte_mp_reply replies;
> + struct ipc_queues *request_param = (struct ipc_queues *)request.param;
> + struct ipc_queues *reply_param;
> + int queue, fd_iterator;
> +
> + /* Prepare the request */
> + strcpy(request.name, TAP_MP_KEY);
> + strcpy(request_param->port_name, port_name);
Should we not be using the strlcpy() functions here.
> + request.len_param = sizeof(*request_param);
> + /* Send request and receive reply */
> + ret = rte_mp_request_sync(&request, &replies, &timeout);
> + if (ret < 0) {
> + TAP_LOG(ERR, "Failed to request queues from primary: %d",
> + rte_errno);
> + return -1;
> + }
> + /* FIXME: handle replies.nb_received > 1 */
> + reply = &replies.msgs[0];
> + reply_param = (struct ipc_queues *)reply->param;
> + TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
> +
> + /* Attach the queues from received file descriptors */
> +
> + dev->data->nb_rx_queues = reply_param->rxq_count;
> + dev->data->nb_tx_queues = reply_param->txq_count;
Do we really need a rxq_count and txq_count as they are always the same it
seems? Just a comment not a request to change it.
> + fd_iterator = 0;
> + for (queue = 0; queue < reply_param->rxq_count; queue++)
> + process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
> + for (queue = 0; queue < reply_param->txq_count; queue++)
> + process_private->txq_fds[queue] = reply->fds[fd_iterator++];
> +
> +
Extra blank line here, needs to be removed.
> + return 0;
> +}
> +
> +/* Send the queue file descriptors from the primary process to secondary. */
> +static int
> +tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> +{
> + struct rte_eth_dev *dev;
> + struct rte_mp_msg reply;
> + const struct ipc_queues *request_param =
> + (const struct ipc_queues *)request->param;
> + struct ipc_queues *reply_param =
> + (struct ipc_queues *)reply.param;
> + uint16_t port_id;
> + int queue;
> + int ret;
> +
> + /* Get requested port */
> + TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
> + ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> + if (ret) {
> + TAP_LOG(ERR, "Failed to get port id for %s",
> + request_param->port_name);
> + return -1;
> + }
> + dev = &rte_eth_devices[port_id];
> +
> + /* Fill file descriptors for all queues */
> + reply.num_fds = 0;
> + reply_param->rxq_count = 0;
> + for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> + reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> + reply_param->rxq_count++;
> + }
> + assert(reply_param->rxq_count == dev->data->nb_rx_queues);
Pick an assert method assert() or RTE_ASSERT() why have both? I would suggest
using RTE_ASSERT everywhere.
> + reply_param->txq_count = 0;
> + for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
> + reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> + reply_param->txq_count++;
> + }
> + assert(reply_param->txq_count == dev->data->nb_tx_queues);
> + /* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
> + RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
I do not like FIXME or TODO statements in the code. For FIXME we need to fix it
or remove the comment. For TODO we need to put it in a enhancement list and do
it later.
> +
> + /* Send reply */
> + strcpy(reply.name, request->name);
> + strcpy(reply_param->port_name, request_param->port_name);
strlcpy() here as well.
> + reply.len_param = sizeof(*reply_param);
> + if (rte_mp_reply(&reply, peer) < 0) {
> + TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
> + return -1;
> + }
> + return 0;
> +}
> +
> /* Open a TAP interface device.
> */
> static int
> @@ -2009,6 +2119,21 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> /* TODO: request info from primary to set up Rx and Tx */
> eth_dev->dev_ops = &ops;
> eth_dev->device = &dev->device;
> + eth_dev->rx_pkt_burst = pmd_rx_burst;
> + eth_dev->tx_pkt_burst = pmd_tx_burst;
> + if (!rte_eal_primary_proc_alive(NULL)) {
> + TAP_LOG(ERR, "Primary process is missing");
> + return -1;
> + }
> + process_private = (struct pmd_process_private *)
> + rte_zmalloc_socket(name,
> + sizeof(struct pmd_process_private),
> + RTE_CACHE_LINE_SIZE,
> + eth_dev->device->numa_node);
> +
> + ret = tap_mp_attach_queues(name, eth_dev);
> + if (ret != 0)
> + return -1;
> rte_eth_dev_probing_finish(eth_dev);
> return 0;
> }
> @@ -2056,6 +2181,13 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
> name, tap_name);
>
> + /* Register IPC feed callback */
> + ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> + if (ret < 0 && rte_errno != EEXIST) {
> + TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> + tuntap_name, strerror(rte_errno));
> + goto leave;
> + }
> ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> ETH_TUNTAP_TYPE_TAP);
>
> @@ -2063,6 +2195,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> if (ret == -1) {
> TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> name, tap_name);
> + rte_mp_action_unregister(TAP_MP_KEY);
> tap_unit--; /* Restore the unit number */
> }
> rte_kvargs_free(kvlist);
> --
> 2.7.4
>
Regards,
Keith