IBV_EVENT_DEVICE_FATAL event is generated by the driver once for the entire multiport Infiniband device, not for each existing ports. The port index is zero and it causes dropping the device removal event. We should invoke the removal event processing routine for each port we have installed handler for.
Fixes: 028b2a28c3cb ("net/mlx5: update event handler for multiport IB devices") Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> --- v2: - address comments - more detailed debug messages in the event handler - removed port specific IBV_EVENT_DEVICE_FATAL handling code v1: http://patches.dpdk.org/patch/53371/ drivers/net/mlx5/mlx5_ethdev.c | 77 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 80ee98f..a8a7ece 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1116,6 +1116,35 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) } /** + * Handle asynchronous removal event for entire multiport device. + * + * @param sh + * Infiniband device shared context. + */ +static void +mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) +{ + uint32_t i; + + for (i = 0; i < sh->max_port; ++i) { + struct rte_eth_dev *dev; + + if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { + /* + * Or not existing port either no + * handler installed for this port. + */ + continue; + } + dev = &rte_eth_devices[sh->port[i].ih_port_id]; + assert(dev); + if (dev->data->dev_conf.intr_conf.rmv) + _rte_eth_dev_callback_process + (dev, RTE_ETH_EVENT_INTR_RMV, NULL); + } +} + +/** * Handle shared asynchronous events the NIC (removal event * and link status change). Supports multiport IB device. * @@ -1137,21 +1166,46 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) break; /* Retrieve and check IB port index. */ tmp = (uint32_t)event.element.port_num; - assert(tmp && (tmp <= sh->max_port)); - if (!tmp || - tmp > sh->max_port || - sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { + if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { /* - * Invalid IB port index or no handler - * installed for this port. + * The DEVICE_FATAL event is called once for + * entire device without port specifying. + * We should notify all existing ports. */ mlx5_glue->ack_async_event(&event); + mlx5_dev_interrupt_device_fatal(sh); + continue; + } + assert(tmp && (tmp <= sh->max_port)); + if (!tmp) { + /* Unsupported devive level event. */ + mlx5_glue->ack_async_event(&event); + DRV_LOG(DEBUG, + "unsupported common event (type %d)", + event.event_type); + continue; + } + if (tmp > sh->max_port) { + /* Invalid IB port index. */ + mlx5_glue->ack_async_event(&event); + DRV_LOG(DEBUG, + "cannot handle an event (type %d)" + "due to invalid IB port index (%u)", + event.event_type, tmp); + continue; + } + if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { + /* No handler installed. */ + mlx5_glue->ack_async_event(&event); + DRV_LOG(DEBUG, + "cannot handle an event (type %d)" + "due to no handler installed for port %u", + event.event_type, tmp); continue; } /* Retrieve ethernet device descriptor. */ tmp = sh->port[tmp - 1].ih_port_id; dev = &rte_eth_devices[tmp]; - tmp = 0; assert(dev); if ((event.event_type == IBV_EVENT_PORT_ACTIVE || event.event_type == IBV_EVENT_PORT_ERR) && @@ -1165,15 +1219,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) (dev, RTE_ETH_EVENT_INTR_LSC, NULL); continue; } - if (event.event_type == IBV_EVENT_DEVICE_FATAL && - dev->data->dev_conf.intr_conf.rmv) { - mlx5_glue->ack_async_event(&event); - _rte_eth_dev_callback_process - (dev, RTE_ETH_EVENT_INTR_RMV, NULL); - continue; - } DRV_LOG(DEBUG, - "port %u event type %d on not handled", + "port %u cannot handle an unknown event (type %d)", dev->data->port_id, event.event_type); mlx5_glue->ack_async_event(&event); } -- 1.8.3.1