Add support for PCI Error Recovery for the tg3 ethernet device driver. The general principles of operation are described in Documentation/pci-error-recovery.txt Other drivers having similar structure include e100, e1000, ixgb, s2io, ipr, sym53c8xx_2, and lpfc
Signed-off-by: Linas Vepstas <[EMAIL PROTECTED]> Cc: Michael Chan <[EMAIL PROTECTED]> ---- Michael, you are listed as the tg3 maintainer; could you please forward upstream if you agree? Tested on the PCI-E version of this adapter, on power6, for 85 (artificial) error injections (overnight) while ftp'ing dvd iso images over the link. Worked well. drivers/net/tg3.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) Index: linux-2.6.22-git2/drivers/net/tg3.c =================================================================== --- linux-2.6.22-git2.orig/drivers/net/tg3.c 2007-07-17 11:07:30.000000000 -0500 +++ linux-2.6.22-git2/drivers/net/tg3.c 2007-07-18 15:10:09.000000000 -0500 @@ -64,7 +64,7 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.77" +#define DRV_MODULE_VERSION "3.77-a" #define DRV_MODULE_RELDATE "May 31, 2007" #define TG3_DEF_MAC_MODE 0 @@ -12126,11 +12126,117 @@ out: return err; } +/** + * tg3_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct tg3 *tp = netdev_priv(netdev); + struct device *dev = &netdev->dev; + + dev_info(dev, "PCI I/O error detected on %s\n", netdev->name); + + if (!netif_running(netdev)) + return PCI_ERS_RESULT_NEED_RESET; + + /* Want to make sure that the reset task doesn't run */ + cancel_work_sync(&tp->reset_task); + tg3_netif_stop(tp); + del_timer_sync(&tp->timer); + netif_device_detach(netdev); + pci_disable_device(pdev); + + if (state == pci_channel_io_perm_failure) { + /* avoid hang in dev_close() with rtnl_lock held */ + netif_poll_enable(netdev); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * tg3_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Restart the card from scratch, as if from a cold-boot. + * At this point, the card has exprienced a hard reset, + * followed by fixups by BIOS, and has its config space + * set up identically to what it was at cold boot. + */ +static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct tg3 *tp = netdev_priv(netdev); + int err; + + if (!netif_running(netdev)) + return PCI_ERS_RESULT_RECOVERED; + + if (pci_enable_device(pdev)) { + printk(KERN_ERR "tg3: %s: " + "Cannot re-enable PCI device after reset.\n", netdev->name); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(tp->pdev); + netif_device_attach(netdev); + + tg3_full_lock(tp, 0); + tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; + err = tg3_restart_hw(tp, 1); + tg3_full_unlock(tp); + if (err) { + printk(KERN_ERR "tg3: %s: " + "Cannot restart hardware after reset.\n", netdev->name); + return PCI_ERS_RESULT_DISCONNECT; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +/** + * tg3_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells + * us that its OK to resume normal operation. + */ +static void tg3_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct tg3 *tp = netdev_priv(netdev); + + if (!netif_running(netdev)) + return; + + netif_wake_queue(netdev); + + tp->timer.expires = jiffies + tp->timer_offset; + add_timer(&tp->timer); + + tg3_netif_start(tp); +} + +static struct pci_error_handlers tg3_err_handler = { + .error_detected = tg3_io_error_detected, + .slot_reset = tg3_io_slot_reset, + .resume = tg3_io_resume, +}; + static struct pci_driver tg3_driver = { .name = DRV_MODULE_NAME, .id_table = tg3_pci_tbl, .probe = tg3_init_one, .remove = __devexit_p(tg3_remove_one), + .err_handler = &tg3_err_handler, .suspend = tg3_suspend, .resume = tg3_resume }; - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html