Add support for PCI Error Recovery for the tg3 ethernet
device driver. The general principles of operation are
described in Documentation/pci-error-recovery.txt
Other drivers having similar structure include e100,
e1000, ixgb, s2io, ipr, sym53c8xx_2, and lpfc

Signed-off-by: Linas Vepstas <[EMAIL PROTECTED]>
Cc: Michael Chan <[EMAIL PROTECTED]>

----

Michael, you are listed as the tg3 maintainer; could you
please forward upstream if you agree?  

Tested on the PCI-E version of this adapter, on power6, 
for 85 (artificial) error injections (overnight) while
ftp'ing dvd iso images over the link. Worked well.

 drivers/net/tg3.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

Index: linux-2.6.22-git2/drivers/net/tg3.c
===================================================================
--- linux-2.6.22-git2.orig/drivers/net/tg3.c    2007-07-17 11:07:30.000000000 
-0500
+++ linux-2.6.22-git2/drivers/net/tg3.c 2007-07-18 15:10:09.000000000 -0500
@@ -64,7 +64,7 @@
 
 #define DRV_MODULE_NAME                "tg3"
 #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "3.77"
+#define DRV_MODULE_VERSION     "3.77-a"
 #define DRV_MODULE_RELDATE     "May 31, 2007"
 
 #define TG3_DEF_MAC_MODE       0
@@ -12126,11 +12126,117 @@ out:
        return err;
 }
 
+/**
+ * tg3_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected. 
+ */
+static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+                                               pci_channel_state_t state)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct tg3 *tp = netdev_priv(netdev);
+       struct device *dev = &netdev->dev;
+
+       dev_info(dev, "PCI I/O error detected on %s\n", netdev->name);
+
+       if (!netif_running(netdev))
+               return PCI_ERS_RESULT_NEED_RESET;
+
+       /* Want to make sure that the reset task doesn't run */
+       cancel_work_sync(&tp->reset_task);
+       tg3_netif_stop(tp);
+       del_timer_sync(&tp->timer);
+       netif_device_detach(netdev);
+       pci_disable_device(pdev);
+
+       if (state == pci_channel_io_perm_failure) {
+               /* avoid hang in dev_close() with rtnl_lock held */
+               netif_poll_enable(netdev);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * tg3_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct tg3 *tp = netdev_priv(netdev);
+       int err;
+
+       if (!netif_running(netdev))
+               return PCI_ERS_RESULT_RECOVERED;
+
+       if (pci_enable_device(pdev)) {
+               printk(KERN_ERR "tg3: %s: "
+                      "Cannot re-enable PCI device after reset.\n", 
netdev->name);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       pci_set_master(pdev);
+       pci_restore_state(tp->pdev);
+       netif_device_attach(netdev);
+
+       tg3_full_lock(tp, 0);
+       tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+       err = tg3_restart_hw(tp, 1);
+       tg3_full_unlock(tp);
+       if (err) {
+               printk(KERN_ERR "tg3: %s: "
+                      "Cannot restart hardware after reset.\n", netdev->name);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * tg3_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void tg3_io_resume(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct tg3 *tp = netdev_priv(netdev);
+
+       if (!netif_running(netdev))
+               return;
+
+       netif_wake_queue(netdev);
+
+       tp->timer.expires = jiffies + tp->timer_offset;
+       add_timer(&tp->timer);
+
+       tg3_netif_start(tp);
+}
+
+static struct pci_error_handlers tg3_err_handler = {
+       .error_detected = tg3_io_error_detected,
+       .slot_reset = tg3_io_slot_reset,
+       .resume = tg3_io_resume,
+};
+
 static struct pci_driver tg3_driver = {
        .name           = DRV_MODULE_NAME,
        .id_table       = tg3_pci_tbl,
        .probe          = tg3_init_one,
        .remove         = __devexit_p(tg3_remove_one),
+       .err_handler = &tg3_err_handler,
        .suspend        = tg3_suspend,
        .resume         = tg3_resume
 };
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to