Sunday, May 12, 2019 3:15 PM, Shahaf Shuler: > Subject: Re: [dpdk-dev] [PATCH v2] net/mlx5: fix device removal handler for > multiport device > > Sunday, May 12, 2019 11:32 AM, Viacheslav Ovsiienko: > > Subject: [dpdk-dev] [PATCH v2] net/mlx5: fix device removal handler > > for multiport device > > > > IBV_EVENT_DEVICE_FATAL event is generated by the driver once for the > > entire multiport Infiniband device, not for each existing ports. > > The port index is zero and it causes dropping the device removal > > event. We should invoke the removal event processing routine for each > > port we have installed handler for. > > > > Fixes: 028b2a28c3cb ("net/mlx5: update event handler for multiport IB > > devices") > > > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > > Acked-by: Shahaf Shuler <shah...@mellanox.com> > > Thomas, Ferruh, > This one is a critical fix for mlx5. w/o it will break the support for > failsafe at > azure. > > Can you consider to integrate it?
Applied it also to next-net-mlx, thanks. > > > --- > > v2: - address comments > > - more detailed debug messages in the event handler > > - removed port specific IBV_EVENT_DEVICE_FATAL handling code > > > > v1: > > > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch > > > es.dpdk.org%2Fpatch%2F53371%2F&data=02%7C01%7Cshahafs%40mel > > > lanox.com%7C46fcede947654c45106e08d6d6b462e5%7Ca652971c7d2e4d9ba > > > 6a4d149256f461b%7C0%7C0%7C636932467570850420&sdata=%2FN%2B > > D0OWf5y0hgtlvWj7om9qZrQPPIbmGXDIfsgqeUtY%3D&reserved=0 > > > > drivers/net/mlx5/mlx5_ethdev.c | 77 > > ++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 62 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/net/mlx5/mlx5_ethdev.c > > b/drivers/net/mlx5/mlx5_ethdev.c index 80ee98f..a8a7ece 100644 > > --- a/drivers/net/mlx5/mlx5_ethdev.c > > +++ b/drivers/net/mlx5/mlx5_ethdev.c > > @@ -1116,6 +1116,35 @@ int mlx5_fw_version_get(struct rte_eth_dev > > *dev, char *fw_ver, size_t fw_size) } > > > > /** > > + * Handle asynchronous removal event for entire multiport device. > > + * > > + * @param sh > > + * Infiniband device shared context. > > + */ > > +static void > > +mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) { > > + uint32_t i; > > + > > + for (i = 0; i < sh->max_port; ++i) { > > + struct rte_eth_dev *dev; > > + > > + if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { > > + /* > > + * Or not existing port either no > > + * handler installed for this port. > > + */ > > + continue; > > + } > > + dev = &rte_eth_devices[sh->port[i].ih_port_id]; > > + assert(dev); > > + if (dev->data->dev_conf.intr_conf.rmv) > > + _rte_eth_dev_callback_process > > + (dev, RTE_ETH_EVENT_INTR_RMV, NULL); > > + } > > +} > > + > > +/** > > * Handle shared asynchronous events the NIC (removal event > > * and link status change). Supports multiport IB device. > > * > > @@ -1137,21 +1166,46 @@ int mlx5_fw_version_get(struct rte_eth_dev > > *dev, char *fw_ver, size_t fw_size) > > break; > > /* Retrieve and check IB port index. */ > > tmp = (uint32_t)event.element.port_num; > > - assert(tmp && (tmp <= sh->max_port)); > > - if (!tmp || > > - tmp > sh->max_port || > > - sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { > > + if (!tmp && event.event_type == > > IBV_EVENT_DEVICE_FATAL) { > > /* > > - * Invalid IB port index or no handler > > - * installed for this port. > > + * The DEVICE_FATAL event is called once for > > + * entire device without port specifying. > > + * We should notify all existing ports. > > */ > > mlx5_glue->ack_async_event(&event); > > + mlx5_dev_interrupt_device_fatal(sh); > > + continue; > > + } > > + assert(tmp && (tmp <= sh->max_port)); > > + if (!tmp) { > > + /* Unsupported devive level event. */ > > + mlx5_glue->ack_async_event(&event); > > + DRV_LOG(DEBUG, > > + "unsupported common event (type %d)", > > + event.event_type); > > + continue; > > + } > > + if (tmp > sh->max_port) { > > + /* Invalid IB port index. */ > > + mlx5_glue->ack_async_event(&event); > > + DRV_LOG(DEBUG, > > + "cannot handle an event (type %d)" > > + "due to invalid IB port index (%u)", > > + event.event_type, tmp); > > + continue; > > + } > > + if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { > > + /* No handler installed. */ > > + mlx5_glue->ack_async_event(&event); > > + DRV_LOG(DEBUG, > > + "cannot handle an event (type %d)" > > + "due to no handler installed for port %u", > > + event.event_type, tmp); > > continue; > > } > > /* Retrieve ethernet device descriptor. */ > > tmp = sh->port[tmp - 1].ih_port_id; > > dev = &rte_eth_devices[tmp]; > > - tmp = 0; > > assert(dev); > > if ((event.event_type == IBV_EVENT_PORT_ACTIVE || > > event.event_type == IBV_EVENT_PORT_ERR) && @@ - > > 1165,15 +1219,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, > > char *fw_ver, size_t fw_size) > > (dev, RTE_ETH_EVENT_INTR_LSC, NULL); > > continue; > > } > > - if (event.event_type == IBV_EVENT_DEVICE_FATAL && > > - dev->data->dev_conf.intr_conf.rmv) { > > - mlx5_glue->ack_async_event(&event); > > - _rte_eth_dev_callback_process > > - (dev, RTE_ETH_EVENT_INTR_RMV, NULL); > > - continue; > > - } > > DRV_LOG(DEBUG, > > - "port %u event type %d on not handled", > > + "port %u cannot handle an unknown event (type > > %d)", > > dev->data->port_id, event.event_type); > > mlx5_glue->ack_async_event(&event); > > } > > -- > > 1.8.3.1