> -----Original Message-----
> From: David Marchand <david.march...@redhat.com>
> Sent: Thursday, March 9, 2023 8:38 PM
> To: dev@dpdk.org
> Cc: sta...@dpdk.org; Maxime Coquelin <maxime.coque...@redhat.com>; Xia,
> Chenbo <chenbo....@intel.com>; Jianfeng Tan <jianfeng....@intel.com>;
> Junjie Chen <junjie.j.c...@intel.com>; Hyong Youb Kim <hyon...@cisco.com>;
> Harman Kalra <hka...@marvell.com>
> Subject: [PATCH 3/3] net/vhost: fix Rx interrupt
> 
> In the situation when a port was started while no virtio driver was
> connected, Rx interrupts were broken.
> They were also broken after a virtio driver reconnects.
> 
> There were several issues mixed in:
> - this driver was not exposing a fixed file descriptor per Rx queue,
>   If a virtio driver was not connected yet, each Rx queue vector was
>   pointing at a -1 fd, and an application could interpret this as a lack
>   of Rx interrupt support,
> - when a virtio driver later (re)connected, this net/vhost driver was
>   hacking into the EAL layer epoll fd to remove a old vring kickfd and
>   insert the new vring kickfd. This hack constitutes a layer violation
>   plus users of rte_eth_dev_rx_intr_ctl_q_get_fd() were not notified of
>   this change,
> - in the case of reconnection, because the interrupt handle was
>   reallocated, a 0 fd was failing to be removed from the EAL layer
>   epoll fd, which resulted in never fixing the EAL epoll fd,
> 
> To fix Rx interrupts:
> - allocating (eth_vhost_install_intr) / releasing
>   (eth_vhost_uninstall_intr) the interrupt handle is moved when
>   starting / closing the port, while setting / resetting per rxq fd is
>   triggered by vhost events via some new helpers (see
>   eth_vhost_configure_intr and eth_vhost_unconfigure_intr),
> - a "proxy" epoll fd is created per Rx queue at the time the interrupt
>   handle is allocated, so applications can start waiting for events on
>   those fds, even before a virtio driver initialises,
> - when available, vring kickd are populated in the "proxy" epoll fd,
> 
> Fixes: 3f8ff12821e4 ("vhost: support interrupt mode")
> Fixes: 3d4cd4be577c ("net/vhost: fix interrupt mode")
> Fixes: d61138d4f0e2 ("drivers: remove direct access to interrupt handle")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: David Marchand <david.march...@redhat.com>
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 318 ++++++++++++------------------
>  1 file changed, 127 insertions(+), 191 deletions(-)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index 96deb18d91..62ef955ebc 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -78,8 +78,9 @@ struct vhost_queue {
>       uint16_t port;
>       uint16_t virtqueue_id;
>       struct vhost_stats stats;
> -     int intr_enable;
>       rte_spinlock_t intr_lock;
> +     struct epoll_event ev;
> +     int kickfd;
>  };
> 
>  struct pmd_internal {
> @@ -545,115 +546,68 @@ find_internal_resource(char *ifname)
>       return list;
>  }
> 
> -static int
> +static void
>  eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
>  {
> -     struct rte_intr_handle *handle = eth_dev->intr_handle;
> -     struct rte_epoll_event rev, *elist;
> -     int epfd, ret;
> -
> -     if (handle == NULL)
> -             return 0;
> -
> -     elist = rte_intr_elist_index_get(handle, rxq_idx);
> -     if (rte_intr_efds_index_get(handle, rxq_idx) == elist->fd)
> -             return 0;
> -
> -     VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
> -                     rxq_idx);
> +     struct rte_vhost_vring vring;
> +     struct vhost_queue *vq;
> 
> -     if (elist->fd != -1)
> -             VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d,
> expected -1).\n",
> -                     elist->fd);
> +     vq = eth_dev->data->rx_queues[rxq_idx];
> +     if (vq == NULL || vq->vid < 0)
> +             return;
> 
> -     /*
> -      * First remove invalid epoll event, and then install
> -      * the new one. May be solved with a proper API in the
> -      * future.
> -      */
> -     epfd = elist->epfd;
> -     rev = *elist;
> -     ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
> -                     elist);
> -     if (ret) {
> -             VHOST_LOG(ERR, "Delete epoll event failed.\n");
> -             return ret;
> +     if (rte_vhost_get_vhost_vring(vq->vid, (rxq_idx << 1) + 1, &vring) <
> 0) {
> +             VHOST_LOG(DEBUG, "Failed to get rxq-%d's vring, skip!\n",
> rxq_idx);
> +             return;
>       }
> 
> -     rev.fd = rte_intr_efds_index_get(handle, rxq_idx);
> -     if (rte_intr_elist_index_set(handle, rxq_idx, rev))
> -             return -rte_errno;
> +     rte_spinlock_lock(&vq->intr_lock);
> 
> -     elist = rte_intr_elist_index_get(handle, rxq_idx);
> -     ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, elist);
> -     if (ret) {
> -             VHOST_LOG(ERR, "Add epoll event failed.\n");
> -             return ret;
> +     /* Remove previous kickfd from proxy epoll */
> +     if (vq->kickfd >= 0 && vq->kickfd != vring.kickfd) {
> +             if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd, &vq-
> >ev) < 0) {
> +                     VHOST_LOG(DEBUG, "Failed to unregister %d from rxq-%d
> epoll: %s\n",
> +                             vq->kickfd, rxq_idx, strerror(errno));
> +             } else {
> +                     VHOST_LOG(DEBUG, "Unregistered %d from rxq-%d epoll\n",
> +                             vq->kickfd, rxq_idx);
> +             }
> +             vq->kickfd = -1;
> +     }
> +
> +     /* Add new one, if valid */
> +     if (vq->kickfd != vring.kickfd && vring.kickfd >= 0) {
> +             if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_ADD, vring.kickfd,
> &vq->ev) < 0) {
> +                     VHOST_LOG(ERR, "Failed to register %d in rxq-%d
> epoll: %s\n",
> +                             vring.kickfd, rxq_idx, strerror(errno));
> +             } else {
> +                     vq->kickfd = vring.kickfd;
> +                     VHOST_LOG(DEBUG, "Registered %d in rxq-%d epoll\n",
> +                             vq->kickfd, rxq_idx);
> +             }
>       }
> 
> -     return 0;
> +     rte_spinlock_unlock(&vq->intr_lock);
>  }
> 
>  static int
>  eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
>  {
> -     struct rte_vhost_vring vring;
> -     struct vhost_queue *vq;
> -     int old_intr_enable, ret = 0;
> -
> -     vq = dev->data->rx_queues[qid];
> -     if (!vq) {
> -             VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
> -             return -1;
> -     }
> -
> -     rte_spinlock_lock(&vq->intr_lock);
> -     old_intr_enable = vq->intr_enable;
> -     vq->intr_enable = 1;
> -     ret = eth_vhost_update_intr(dev, qid);
> -     rte_spinlock_unlock(&vq->intr_lock);
> -
> -     if (ret < 0) {
> -             VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
> -             vq->intr_enable = old_intr_enable;
> -             return ret;
> -     }
> +     struct vhost_queue *vq = dev->data->rx_queues[qid];
> 
> -     ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
> -     if (ret < 0) {
> -             VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
> -             return ret;
> -     }
> -     VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
> -     rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
> -     rte_wmb();
> +     if (vq->vid >= 0)
> +             rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1,
> 1);
> 
> -     return ret;
> +     return 0;
>  }
> 
>  static int
>  eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
>  {
> -     struct rte_vhost_vring vring;
> -     struct vhost_queue *vq;
> -     int ret = 0;
> -
> -     vq = dev->data->rx_queues[qid];
> -     if (!vq) {
> -             VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
> -             return -1;
> -     }
> +     struct vhost_queue *vq = dev->data->rx_queues[qid];
> 
> -     ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
> -     if (ret < 0) {
> -             VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
> -             return ret;
> -     }
> -     VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
> -     rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
> -     rte_wmb();
> -
> -     vq->intr_enable = 0;
> +     if (vq->vid >= 0)
> +             rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1,
> 0);
> 
>       return 0;
>  }
> @@ -664,6 +618,14 @@ eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
>       struct rte_intr_handle *intr_handle = dev->intr_handle;
> 
>       if (intr_handle != NULL) {
> +             int i;
> +
> +             for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +                     int epoll_fd = rte_intr_efds_index_get(dev->intr_handle,
> i);
> +
> +                     if (epoll_fd >= 0)
> +                             close(epoll_fd);
> +             }
>               rte_intr_vec_list_free(intr_handle);
>               rte_intr_instance_free(intr_handle);
>       }
> @@ -673,15 +635,11 @@ eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
>  static int
>  eth_vhost_install_intr(struct rte_eth_dev *dev)
>  {
> -     struct rte_vhost_vring vring;
> -     struct vhost_queue *vq;
>       int nb_rxq = dev->data->nb_rx_queues;
> -     int i;
> -     int ret;
> +     struct vhost_queue *vq;
> 
> -     /* uninstall firstly if we are reconnecting */
> -     if (dev->intr_handle != NULL)
> -             eth_vhost_uninstall_intr(dev);
> +     int ret;
> +     int i;
> 
>       dev->intr_handle =
> rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
>       if (dev->intr_handle == NULL) {
> @@ -689,7 +647,7 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>               ret = -ENOMEM;
>               goto error;
>       }
> -     if (rte_intr_efd_counter_size_set(dev->intr_handle,
> sizeof(uint64_t))) {
> +     if (rte_intr_efd_counter_size_set(dev->intr_handle, 0)) {
>               ret = -rte_errno;
>               goto error;
>       }
> @@ -700,40 +658,28 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>               goto error;
>       }
> 
> -
> -     VHOST_LOG(INFO, "Prepare intr vec\n");
> +     VHOST_LOG(DEBUG, "Prepare intr vec\n");
>       for (i = 0; i < nb_rxq; i++) {
> -             if (rte_intr_vec_list_index_set(dev->intr_handle, i,
> -                             RTE_INTR_VEC_RXTX_OFFSET + i)) {
> -                     ret = -rte_errno;
> +             int epoll_fd = epoll_create1(0);
> +
> +             if (epoll_fd < 0) {
> +                     VHOST_LOG(ERR, "Failed to create proxy epoll fd for rxq-
> %d\n", i);
> +                     ret = -errno;
>                       goto error;
>               }
> -             if (rte_intr_efds_index_set(dev->intr_handle, i, -1)) {
> +
> +             if (rte_intr_vec_list_index_set(dev->intr_handle, i,
> +                             RTE_INTR_VEC_RXTX_OFFSET + i) ||
> +                             rte_intr_efds_index_set(dev->intr_handle, i,
> epoll_fd)) {
>                       ret = -rte_errno;
> +                     close(epoll_fd);
>                       goto error;
>               }
> -             vq = dev->data->rx_queues[i];
> -             if (!vq) {
> -                     VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
> -                     continue;
> -             }
> -
> -             ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
> -             if (ret < 0) {
> -                     VHOST_LOG(INFO,
> -                             "Failed to get rxq-%d's vring, skip!\n", i);
> -                     continue;
> -             }
> -
> -             if (vring.kickfd < 0) {
> -                     VHOST_LOG(INFO,
> -                             "rxq-%d's kickfd is invalid, skip!\n", i);
> -                     continue;
> -             }
> 
> -             if (rte_intr_efds_index_set(dev->intr_handle, i, vring.kickfd))
> -                     continue;
> -             VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
> +             vq = dev->data->rx_queues[i];
> +             memset(&vq->ev, 0, sizeof(vq->ev));
> +             vq->ev.events = EPOLLIN;
> +             vq->ev.data.fd = epoll_fd;
>       }
> 
>       if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq)) {
> @@ -756,6 +702,46 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>       return ret;
>  }
> 
> +static void
> +eth_vhost_configure_intr(struct rte_eth_dev *dev)
> +{
> +     int i;
> +
> +     VHOST_LOG(DEBUG, "Configure intr vec\n");
> +     for (i = 0; i < dev->data->nb_rx_queues; i++)
> +             eth_vhost_update_intr(dev, i);
> +}
> +
> +static void
> +eth_vhost_unconfigure_intr(struct rte_eth_dev *eth_dev)
> +{
> +     struct vhost_queue *vq;
> +     int i;
> +
> +     VHOST_LOG(DEBUG, "Unconfigure intr vec\n");
> +     for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> +             vq = eth_dev->data->rx_queues[i];
> +             if (vq == NULL || vq->vid < 0)
> +                     continue;
> +
> +             rte_spinlock_lock(&vq->intr_lock);
> +
> +             /* Remove previous kickfd from proxy epoll */
> +             if (vq->kickfd >= 0) {
> +                     if (epoll_ctl(vq->ev.data.fd, EPOLL_CTL_DEL, vq->kickfd,
> &vq->ev) < 0) {
> +                             VHOST_LOG(DEBUG, "Failed to unregister %d from
> rxq-%d epoll: %s\n",
> +                                     vq->kickfd, i, strerror(errno));
> +                     } else {
> +                             VHOST_LOG(DEBUG, "Unregistered %d from rxq-%d
> epoll\n",
> +                                     vq->kickfd, i);
> +                     }
> +                     vq->kickfd = -1;
> +             }
> +
> +             rte_spinlock_unlock(&vq->intr_lock);
> +     }
> +}
> +
>  static void
>  update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing)
>  {
> @@ -862,16 +848,8 @@ new_device(int vid)
>       internal->vid = vid;
>       if (rte_atomic32_read(&internal->started) == 1) {
>               queue_setup(eth_dev, internal);
> -
> -             if (dev_conf->intr_conf.rxq) {
> -                     if (eth_vhost_install_intr(eth_dev) < 0) {
> -                             VHOST_LOG(INFO,
> -                                     "Failed to install interrupt 
> handler.\n");
> -                                     return -1;
> -                     }
> -             }
> -     } else {
> -             VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
> +             if (dev_conf->intr_conf.rxq)
> +                     eth_vhost_configure_intr(eth_dev);
>       }
> 
>       for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
> @@ -915,6 +893,7 @@ destroy_device(int vid)
> 
>       rte_atomic32_set(&internal->dev_attached, 0);
>       update_queuing_status(eth_dev, true);
> +     eth_vhost_unconfigure_intr(eth_dev);
> 
>       eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
> 
> @@ -943,55 +922,10 @@ destroy_device(int vid)
>       rte_spinlock_unlock(&state->lock);
> 
>       VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
> -     eth_vhost_uninstall_intr(eth_dev);
> 
>       rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
>  }
> 
> -static int
> -vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
> -{
> -     struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
> -     struct pmd_internal *internal = eth_dev->data->dev_private;
> -     struct vhost_queue *vq;
> -     struct rte_vhost_vring vring;
> -     int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
> -     int ret = 0;
> -
> -     /*
> -      * The vring kickfd may be changed after the new device notification.
> -      * Update it when the vring state is updated.
> -      */
> -     if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues &&
> -         rte_atomic32_read(&internal->dev_attached) &&
> -         rte_atomic32_read(&internal->started) &&
> -         dev_conf->intr_conf.rxq) {
> -             ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
> -             if (ret) {
> -                     VHOST_LOG(ERR, "Failed to get vring %d information.\n",
> -                                     vring_id);
> -                     return ret;
> -             }
> -
> -             if (rte_intr_efds_index_set(eth_dev->intr_handle, rx_idx,
> -                                                vring.kickfd))
> -                     return -rte_errno;
> -
> -             vq = eth_dev->data->rx_queues[rx_idx];
> -             if (!vq) {
> -                     VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
> -                     return -1;
> -             }
> -
> -             rte_spinlock_lock(&vq->intr_lock);
> -             if (vq->intr_enable)
> -                     ret = eth_vhost_update_intr(eth_dev, rx_idx);
> -             rte_spinlock_unlock(&vq->intr_lock);
> -     }
> -
> -     return ret;
> -}
> -
>  static int
>  vring_state_changed(int vid, uint16_t vring, int enable)
>  {
> @@ -1011,9 +945,8 @@ vring_state_changed(int vid, uint16_t vring, int
> enable)
>       /* won't be NULL */
>       state = vring_states[eth_dev->data->port_id];
> 
> -     if (enable && vring_conf_update(vid, eth_dev, vring))
> -             VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n",
> -                       (int)vring);
> +     if (eth_dev->data->dev_conf.intr_conf.rxq && vring % 2)
> +             eth_vhost_update_intr(eth_dev, (vring - 1) >> 1);
> 
>       rte_spinlock_lock(&state->lock);
>       if (state->cur[vring] == enable) {
> @@ -1200,18 +1133,17 @@ eth_dev_start(struct rte_eth_dev *eth_dev)
>       struct pmd_internal *internal = eth_dev->data->dev_private;
>       struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
> 
> -     queue_setup(eth_dev, internal);
> -
> -     if (rte_atomic32_read(&internal->dev_attached) == 1) {
> -             if (dev_conf->intr_conf.rxq) {
> -                     if (eth_vhost_install_intr(eth_dev) < 0) {
> -                             VHOST_LOG(INFO,
> -                                     "Failed to install interrupt 
> handler.\n");
> -                                     return -1;
> -                     }
> -             }
> +     eth_vhost_uninstall_intr(eth_dev);
> +     if (dev_conf->intr_conf.rxq && eth_vhost_install_intr(eth_dev) < 0)
> {
> +             VHOST_LOG(ERR, "Failed to install interrupt handler.\n");
> +             return -1;
>       }
> 
> +     queue_setup(eth_dev, internal);
> +     if (rte_atomic32_read(&internal->dev_attached) == 1 &&
> +                     dev_conf->intr_conf.rxq)
> +             eth_vhost_configure_intr(eth_dev);
> +
>       rte_atomic32_set(&internal->started, 1);
>       update_queuing_status(eth_dev, false);
> 
> @@ -1266,6 +1198,8 @@ eth_dev_close(struct rte_eth_dev *dev)
>       rte_free(internal->iface_name);
>       rte_free(internal);
> 
> +     eth_vhost_uninstall_intr(dev);
> +
>       dev->data->dev_private = NULL;
> 
>       rte_free(vring_states[dev->data->port_id]);
> @@ -1293,6 +1227,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t
> rx_queue_id,
>       vq->mb_pool = mb_pool;
>       vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
>       rte_spinlock_init(&vq->intr_lock);
> +     vq->kickfd = -1;
>       dev->data->rx_queues[rx_queue_id] = vq;
> 
>       return 0;
> @@ -1315,6 +1250,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t
> tx_queue_id,
> 
>       vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
>       rte_spinlock_init(&vq->intr_lock);
> +     vq->kickfd = -1;
>       dev->data->tx_queues[tx_queue_id] = vq;
> 
>       return 0;
> --
> 2.39.2

Reviewed-by: Chenbo Xia <chenbo....@intel.com> 

Reply via email to