Hi,

The patch looks good globally, but I have a few comments/requests, inline.

Best regards,

Pascal

On 14/09/2017 17:07, Ophir Munk wrote:
> This commit fixes two bugs related to tap devices. The first bug occurs
> when executing in testpmd the following flow rule assuming tap device has
> 4 rx and tx pair queues
> "flow create 0 ingress pattern eth / end actions queue index 5 / end"
> This command will report on success and will print ""Flow rule #0 created"
> although it should have failed as queue index number 5 does not exist
>
> The second bug occurs when executing in testpmd "port start all" following
> a port configuration. Assuming 1 pair of rx and tx queues an error is
> reported: "Fail to start port 0"
>
> Before this commit a fixed max number (16) of rx and tx queue pairs were
> created on startup where the file descriptors (fds) of rx and tx pairs were
> identical.  As a result in the first bug queue index 5 existed because the
> tap device was created with 16 rx and tx queue pairs regardless of the
> configured number of queues. In the second bug when tap device was started
> tx fd was closed before opening it and executing ioctl() on it. However
> closing the sole fd of the device caused ioctl to fail with "No such
> device".
>
> This commit creates the configured number of rx and tx queue pairs (up to
> max 16) and assigns a unique fd to each queue. It was written to solve the
> first bug and was found as the right fix for the second bug as well.
>
> Fixes: 02f96a0a82d1 ("net/tap: add TUN/TAP device PMD")
> Fixes: bf7b7f437b49 ("net/tap: create netdevice during probing")
> Fixes: de96fe68ae95 ("net/tap: add basic flow API patterns and actions")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Ophir Munk <ophi...@mellanox.com>
> ---
> Comment 1:
> Reason for V2: fixing coding style. Previous versions of this patch are 
> supersseded
>
> Comment 2:
> Pascal - I would appreciate your feedback on the following discussion:
>
>>> /* Immediately create the netdevice (this will create the 1st 
>>> queue). */
>>> - if (tap_setup_queue(dev, pmd, 0) == -1)
>>> + // rx queue
>>> + if (tap_setup_queue(dev, pmd, 0, 1) == -1) goto error_exit; // tx 
>>> + queue if (tap_setup_queue(dev, pmd, 0, 0) == -1)
>>> goto error_exit;
>>>
>> Adrien Mazarguil wrote:
>> Can this be removed entirely to instead wait until the application 
>> actually requests the creation of the first queue?
> Ophir Munk wrote:
> Removing this piece of code is causing failure during tap device 
> initialization and testpmd as well.
> The subsequent code following this removal contains hundreds of lines so I 
> prefer getting Pasca advise why the immediate creation is required here and 
> if possible - how to avoid it.
I think it was required to have the netdevice present as soon as we left
the create function, as some other function occurred just a little later
(prior to queue_setup) that needed ioctl() on the netdevice.
I would leave it like this for this patch, but it would be more elegant
to create the netdevice only when really needed.

>
>  drivers/net/tap/rte_eth_tap.c | 130 
> ++++++++++++++++++++++++++++++++----------
>  drivers/net/tap/rte_eth_tap.h |   1 +
>  drivers/net/tap/tap_flow.c    |   3 +-
>  3 files changed, 104 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 9acea83..f8cf330 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -603,8 +603,31 @@ enum ioctl_mode {
>  }
>  
>  static int
> -tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +tap_dev_configure(struct rte_eth_dev *dev)
>  {
> +     if (dev->data->nb_rx_queues > RTE_PMD_TAP_MAX_QUEUES) {
> +             RTE_LOG(ERR, PMD,
> +                     "%s: number of rx queues %d exceeds max num of queues 
> %d\n",
> +                     dev->device->name,
> +                     dev->data->nb_rx_queues,
> +                     RTE_PMD_TAP_MAX_QUEUES);
> +             return -1;
> +     }
> +     if (dev->data->nb_tx_queues > RTE_PMD_TAP_MAX_QUEUES) {
> +             RTE_LOG(ERR, PMD,
> +                     "%s: number of tx queues %d exceeds max num of queues 
> %d\n",
> +                     dev->device->name,
> +                     dev->data->nb_tx_queues,
> +                     RTE_PMD_TAP_MAX_QUEUES);
> +             return -1;
> +     }
> +
> +     RTE_LOG(INFO, PMD, "%s: %p: TX queues number update: 0 -> %u\n",
> +          dev->device->name, (void *)dev, dev->data->nb_tx_queues);
> +
> +     RTE_LOG(INFO, PMD, "%s: %p: RX queues number update: 0 -> %u\n",
> +          dev->device->name, (void *)dev, dev->data->nb_rx_queues);
> +
>       return 0;
>  }
tap_dev_configure() can be called when restarting a port, I'm not so
sure the number update changes number of queues from "0".
Why not a simple "TX/RX configured queues number: %u"?
> @@ -673,9 +696,9 @@ enum ioctl_mode {
>       unsigned long rx_nombuf = 0, ierrors = 0;
>       const struct pmd_internals *pmd = dev->data->dev_private;
>  
> -     imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> -             pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
> -
> +     /* rx queue statistics */
> +     imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +             dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
>       for (i = 0; i < imax; i++) {
>               tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
>               tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
> @@ -683,7 +706,13 @@ enum ioctl_mode {
>               rx_bytes_total += tap_stats->q_ibytes[i];
>               rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
>               ierrors += pmd->rxq[i].stats.ierrors;
> +     }
> +
> +     /* tx queue statistics */
> +     imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +             dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
>  
> +     for (i = 0; i < imax; i++) {
>               tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
>               tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
>               tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
> @@ -730,10 +759,14 @@ enum ioctl_mode {
>       tap_flow_implicit_flush(internals, NULL);
>  
>       for (i = 0; i < internals->nb_queues; i++) {
> -             if (internals->rxq[i].fd != -1)
> +             if (internals->rxq[i].fd != -1) {
>                       close(internals->rxq[i].fd);
> -             internals->rxq[i].fd = -1;
> -             internals->txq[i].fd = -1;
> +                     internals->rxq[i].fd = -1;
> +             }
> +             if (internals->txq[i].fd != -1) {
> +                     close(internals->txq[i].fd);
> +                     internals->txq[i].fd = -1;
> +             }
>       }
>  
>       if (internals->remote_if_index) {
> @@ -887,30 +920,57 @@ enum ioctl_mode {
>  static int
>  tap_setup_queue(struct rte_eth_dev *dev,
>               struct pmd_internals *internals,
> -             uint16_t qid)
> +             uint16_t qid,
> +             int is_rx)
>  {
> +     int *fd;
> +     int *other_fd;
> +     const char *dir;
>       struct pmd_internals *pmd = dev->data->dev_private;
>       struct rx_queue *rx = &internals->rxq[qid];
>       struct tx_queue *tx = &internals->txq[qid];
> -     int fd = rx->fd == -1 ? tx->fd : rx->fd;
>  
> -     if (fd == -1) {
> -             RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
> -                     pmd->name, qid);
> -             fd = tun_alloc(pmd);
> -             if (fd < 0) {
> +     if (is_rx) {
> +             fd = &rx->fd;
> +             other_fd = &tx->fd;
> +             dir = "rx";
> +     } else {
> +             fd = &tx->fd;
> +             other_fd = &rx->fd;
> +             dir = "tx";
> +     }
> +     if (*fd != -1) {
> +             /* fd for this queue already exists */
> +             RTE_LOG(DEBUG, PMD, "%s: fd %d for %s queue qid %d exists\n",
> +                     pmd->name, *fd, dir, qid);
> +     } else if (*other_fd != -1) {
> +             /* Only other_fd exists. dup it */
> +             *fd = dup(*other_fd);
> +             if (*fd < 0) {
> +                     *fd = -1;
> +                     RTE_LOG(ERR, PMD, "%s: dup() failed.\n",
> +                             pmd->name);
> +                     return -1;
> +             }
> +             RTE_LOG(DEBUG, PMD, "%s: dup fd %d for %s queue qid %d (%d)\n",
> +                     pmd->name, *other_fd, dir, qid, *fd);
> +     } else {
> +             /* Both RX and TX fds do not exist (equal -1). Create fd */
> +             *fd = tun_alloc(pmd);
> +             if (*fd < 0) {
> +                     *fd = -1; /* restore original value */
>                       RTE_LOG(ERR, PMD, "%s: tun_alloc() failed.\n",
>                               pmd->name);
>                       return -1;
>               }
> +             RTE_LOG(DEBUG, PMD, "%s: add %s queue for qid %d fd %d\n",
> +                     pmd->name, dir, qid, *fd);
>       }
>  
> -     rx->fd = fd;
> -     tx->fd = fd;
>       tx->mtu = &dev->data->mtu;
>       rx->rxmode = &dev->data->dev_conf.rxmode;
>  
> -     return fd;
> +     return *fd;
>  }
>  
>  static int
> @@ -932,10 +992,10 @@ enum ioctl_mode {
>       int fd;
>       int i;
>  
> -     if ((rx_queue_id >= internals->nb_queues) || !mp) {
> +     if (rx_queue_id >= dev->data->nb_rx_queues || !mp) {
>               RTE_LOG(WARNING, PMD,
> -                     "nb_queues %d too small or mempool NULL\n",
> -                     internals->nb_queues);
> +                     "nb_rx_queues %d too small or mempool NULL\n",
> +                     dev->data->nb_rx_queues);
>               return -1;
>       }
>  
> @@ -954,7 +1014,7 @@ enum ioctl_mode {
>       rxq->iovecs = iovecs;
>  
>       dev->data->rx_queues[rx_queue_id] = rxq;
> -     fd = tap_setup_queue(dev, internals, rx_queue_id);
> +     fd = tap_setup_queue(dev, internals, rx_queue_id, 1);
>       if (fd == -1) {
>               ret = fd;
>               goto error;
> @@ -1002,11 +1062,11 @@ enum ioctl_mode {
>       struct pmd_internals *internals = dev->data->dev_private;
>       int ret;
>  
> -     if (tx_queue_id >= internals->nb_queues)
> +     if (tx_queue_id >= dev->data->nb_tx_queues)
>               return -1;
>  
>       dev->data->tx_queues[tx_queue_id] = &internals->txq[tx_queue_id];
> -     ret = tap_setup_queue(dev, internals, tx_queue_id);
> +     ret = tap_setup_queue(dev, internals, tx_queue_id, 0);
>       if (ret == -1)
>               return -1;
>  
> @@ -1166,7 +1226,6 @@ enum ioctl_mode {
>       .filter_ctrl            = tap_dev_filter_ctrl,
>  };
>  
> -
>  static int
>  eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
>                  char *remote_iface, int fixed_mac_type)
> @@ -1193,6 +1252,7 @@ enum ioctl_mode {
>       }
>  
>       pmd = dev->data->dev_private;
> +     pmd->dev = dev;
You use pmd->dev only to access dev->nb_rx_queues.
I would suggest instead to add *nb_rx_queues in struct pmd_internals,
and remove *dev from it.
You can even remove nb_queues from pmd_internals, and only use
nb_rx_queues and RTE_PMD_TAP_MAX_QUEUES where relevant.

>       snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
>       pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
>  
> @@ -1212,8 +1272,9 @@ enum ioctl_mode {
>  
>       data->dev_link = pmd_link;
>       data->mac_addrs = &pmd->eth_addr;
> -     data->nb_rx_queues = pmd->nb_queues;
> -     data->nb_tx_queues = pmd->nb_queues;
> +     /* Set the number of RX and TX queues */
> +     data->nb_rx_queues = 0;
> +     data->nb_tx_queues = 0;
>  
>       dev->data = data;
>       dev->dev_ops = &ops;
> @@ -1241,7 +1302,11 @@ enum ioctl_mode {
>       }
>  
>       /* Immediately create the netdevice (this will create the 1st queue). */
> -     if (tap_setup_queue(dev, pmd, 0) == -1)
> +     /* rx queue */
> +     if (tap_setup_queue(dev, pmd, 0, 1) == -1)
> +             goto error_exit;
> +     /* tx queue */
> +     if (tap_setup_queue(dev, pmd, 0, 0) == -1)
>               goto error_exit;
>  
>       ifr.ifr_mtu = dev->data->mtu;
> @@ -1515,9 +1580,16 @@ enum ioctl_mode {
>               tap_flow_implicit_flush(internals, NULL);
>               nl_final(internals->nlsk_fd);
>       }
> -     for (i = 0; i < internals->nb_queues; i++)
> -             if (internals->rxq[i].fd != -1)
> +     for (i = 0; i < internals->nb_queues; i++) {
> +             if (internals->rxq[i].fd != -1) {
>                       close(internals->rxq[i].fd);
> +                     internals->rxq[i].fd = -1;
> +             }
> +             if (internals->txq[i].fd != -1) {
> +                     close(internals->txq[i].fd);
> +                     internals->txq[i].fd = -1;
> +             }
> +     }
>  
>       close(internals->ioctl_sock);
>       rte_free(eth_dev->data->dev_private);
> diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
> index 928a045..be57cc7 100644
> --- a/drivers/net/tap/rte_eth_tap.h
> +++ b/drivers/net/tap/rte_eth_tap.h
> @@ -80,6 +80,7 @@ struct tx_queue {
>  };
>  
>  struct pmd_internals {
> +     struct rte_eth_dev *dev;          /* Ethernet device. */
>       char remote_iface[RTE_ETH_NAME_MAX_LEN]; /* Remote netdevice name */
>       char name[RTE_ETH_NAME_MAX_LEN];  /* Internal Tap device name */
>       uint16_t nb_queues;               /* Number of queues supported */
See comment above.
> diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c
> index 41f7345..eefa868 100644
> --- a/drivers/net/tap/tap_flow.c
> +++ b/drivers/net/tap/tap_flow.c
> @@ -1092,7 +1092,8 @@ struct tap_flow_items {
>                       if (action)
>                               goto exit_action_not_supported;
>                       action = 1;
> -                     if (!queue || (queue->index >= pmd->nb_queues))
> +                     if (!queue ||
> +                     (queue->index > pmd->dev->data->nb_rx_queues - 1))
>                               goto exit_action_not_supported;
>                       if (flow)
>                               err = add_action_skbedit(flow, queue->index);

Reply via email to