Hi, > Thanks. I'm suspecting we may need to instrument igb_rd32 at this > point. In order to trigger what you are seeing I am assuming the > device has been detached due to a read failure of some sort.
Okay, I added a printk to igb_rd32. And because no one calls this function directly (all access goes via the rd32/rd32_array macro) I also added the output of the calling function. This should help greatly in identifying the read from the hardware to the consumer. Finally, I noticed that igb_update_stats() produced a lot of churn that most likely are unrelated. So I helper variable to make output from this function go away. I installed this modified driver, rebooted, and removed / inserted the LAN cable until the error was present. As before, "ethtool" and "mii-tool" now said that the device is not there, while "ip link" showed the device as present. The full output of "journalctl -fk | grep igb" is 600 kB. So put the whole file at Google Drive: https://drive.google.com/open?id=1p9cCT2d_EHnSHh29oS3AepUgFTKGFSeA I looked at the output to see patterns, e.g with grep -n igb_get_cfg_done_i210 igb.error.txt grep -n __igb_shutdown igb.error.txt ... (and almost all other function names). I hoped to see patterns. But for my untrained eye, things looked not out of the order. (For reference, here is the debug patch) Index: linux-4.16/drivers/net/ethernet/intel/igb/igb_main.c =================================================================== --- linux-4.16.orig/drivers/net/ethernet/intel/igb/igb_main.c 2018-04-01 23:20:27.000000000 +0200 +++ linux-4.16/drivers/net/ethernet/intel/igb/igb_main.c 2018-04-26 10:36:09.625135952 +0200 @@ -759,7 +759,8 @@ } } -u32 igb_rd32(struct e1000_hw *hw, u32 reg) +int igb_rd32_silent = 0; +u32 igb_rd32(const char *func, struct e1000_hw *hw, u32 reg) { struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); @@ -769,6 +770,8 @@ return ~value; value = readl(&hw_addr[reg]); + if (!igb_rd32_silent) + printk("rd32 %s %08x %08x\n", func, reg, value); /* reads should not return all F's */ if (!(~value) && (!reg || !(~readl(hw_addr)))) { @@ -5935,6 +5938,7 @@ if (pci_channel_offline(pdev)) return; + igb_rd32_silent = 1; bytes = 0; packets = 0; @@ -6100,6 +6104,7 @@ adapter->stats.b2ospc += rd32(E1000_B2OSPC); adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); } + igb_rd32_silent = 0; } static void igb_tsync_interrupt(struct igb_adapter *adapter) Index: linux-4.16/drivers/net/ethernet/intel/igb/e1000_regs.h =================================================================== --- linux-4.16.orig/drivers/net/ethernet/intel/igb/e1000_regs.h 2018-04-01 23:20:27.000000000 +0200 +++ linux-4.16/drivers/net/ethernet/intel/igb/e1000_regs.h 2018-04-26 10:34:24.332157000 +0200 @@ -370,7 +370,8 @@ struct e1000_hw; -u32 igb_rd32(struct e1000_hw *hw, u32 reg); +extern int igb_rd32_silent; +u32 igb_rd32(const char *fname, struct e1000_hw *hw, u32 reg); /* write operations, indexed using DWORDS */ #define wr32(reg, val) \ @@ -380,14 +381,14 @@ writel((val), &hw_addr[(reg)]); \ } while (0) -#define rd32(reg) (igb_rd32(hw, reg)) +#define rd32(reg) (igb_rd32(__func__, hw, reg)) #define wrfl() ((void)rd32(E1000_STATUS)) #define array_wr32(reg, offset, value) \ wr32((reg) + ((offset) << 2), (value)) -#define array_rd32(reg, offset) (igb_rd32(hw, reg + ((offset) << 2))) +#define array_rd32(reg, offset) (igb_rd32(__func__, hw, reg + ((offset) << 2))) /* DMA Coalescing registers */ #define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */