On Thu, 29 Aug 2024 18:15:30 +0200 Maxime Chevallier wrote: > @@ -582,15 +591,12 @@ static void fs_timeout_work(struct work_struct *work) > > dev->stats.tx_errors++; > > - spin_lock_irqsave(&fep->lock, flags); > - > - if (dev->flags & IFF_UP) { > - phy_stop(dev->phydev); > - (*fep->ops->stop)(dev); > - (*fep->ops->restart)(dev); > - } > + rtnl_lock();
so we take rtnl_lock here.. > + phylink_stop(fep->phylink); > + phylink_start(fep->phylink); > + rtnl_unlock(); > > - phy_start(dev->phydev); > + spin_lock_irqsave(&fep->lock, flags); > wake = fep->tx_free >= MAX_SKB_FRAGS && > !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); > spin_unlock_irqrestore(&fep->lock, flags); > @@ -717,19 +686,18 @@ static int fs_enet_close(struct net_device *dev) > unsigned long flags; > > netif_stop_queue(dev); > - netif_carrier_off(dev); > napi_disable(&fep->napi); > cancel_work_sync(&fep->timeout_work); ..and cancel_work_sync() under rtnl_lock here? IDK if removing the the "dev->flags & IFF_UP" check counts as meaningfully making it worse, but we're going in the wrong direction. The _sync() has to go, and the timeout work needs to check if device has been closed under rtnl_lock ? > - phy_stop(dev->phydev); > + phylink_stop(fep->phylink); > > spin_lock_irqsave(&fep->lock, flags); > spin_lock(&fep->tx_lock); > (*fep->ops->stop)(dev); > spin_unlock(&fep->tx_lock); > spin_unlock_irqrestore(&fep->lock, flags); > + phylink_disconnect_phy(fep->phylink); > > /* release any irqs */ > - phy_disconnect(dev->phydev); > free_irq(fep->interrupt, dev); > > return 0;