Hi, On Wed, Sep 7, 2011 at 8:18 PM, Sean Bruno <sean...@yahoo-inc.com> wrote: > On Wed, 2011-09-07 at 16:19 -0700, Arnaud Lacombe wrote: >> Hi, >> >> On Mon, Sep 5, 2011 at 2:59 AM, Arnaud Lacombe <lacom...@gmail.com> wrote: >> > Hi folks, >> > >> > We have been trying to track down a bad mbuf management for about two >> > weeks on a customized 7.1 base. I have finally been able to reproduce >> > it with a stock FreeBSD 7-STABLE (kernel from r225276, userland from >> > 7.4). >> > >> > With the help of the attached patches, I have just been able to >> > trigger the following panic: >> > >> > panic: Corrupted unused flags, expected 0xffffffff00000000, got 0x0, flags >> > 0x3 >> > cpuid = 1 >> > Uptime: 3d10h5m3s >> > Cannot dump. No dump device defined >> > >> General form of the crash is: >> >> panic: Corrupted unused flags, expected 0xffffffff00000000, got >> 0xbabe0000000000, flags 0xbabe0000babe00 >> cpuid = 0 >> KDB: stack backtrace: >> db_trace_self_wrapper(c0874e29,0,c0835757,f4574c48,0,...) at >> db_trace_self_wrapper+0x26 >> panic(c0835757,0,ffffffff,0,babe00,...) at panic+0x10b >> igb_txeof(c6a25008,0,c0837083,5ea,17c,...) at igb_txeof+0x399 >> igb_msix_que(c6a2b800,0,c084d367,4b6,c69dd068,...) at igb_msix_que+0x7b >> ithread_loop(c6a29090,f4574d38,c084d0db,31c,c6a16828,...) at >> ithread_loop+0xc3 >> fork_exit(c061d520,c6a29090,f4574d38) at fork_exit+0xa6 >> fork_trampoline() at fork_trampoline+0x8 >> --- trap 0, eip = 0, esp = 0xf4574d70, ebp = 0 --- >> Uptime: 1m42s >> >> It happens particularly easily when the box receives wall of SYN >> (about 1000 cnx attempts at once) every 5s or so. >> >> - Arnaud >> >> > > > I swear that we squashed this last year on "Yahoo BSD" last year with > Jack's help. > > I know that we set: > hw.igb.num_queues="1" > hw.igb.rxd="4096" > hw.igb.txd="4096" > Actually, as I pointed out in my original mail, when we had the issue end of last year, lowering the number of queue to 1 fixed the problem. As it did for PR/155597. However, we are now seeing this with em(4), and eventually vr(4). So this might not be igb(4) specific.
Anyhow, I'll add your patch to my test kernel. Thanks, - Arnaud > In addition to some txeof() handling patches ... Jack can see if this is > still relevant in the freebsd7 universe. Note that Yahoo is "special" > and we pluck and chuck code/drivers/whatever at will so I don't know how > this code will apply to stable-7 or if it has been applied already. > > Note that this code "rage-quits" the EIAM/EIAC auto ack strategy and > tries to handle the OACTIVE state handling better. I hope these clues > find you well. > > --- //depot/yahoo/ybsd_7/src/sys/dev/e1000/if_igb.c 2010-11-29 > 20:47:23.000000000 0000 > +++ /home/seanbru/ybsd_7/src/sys/dev/e1000/if_igb.c 2010-11-29 > 20:47:23.000000000 0000 > @@ -30,7 +30,7 @@ > POSSIBILITY OF SUCH DAMAGE. > > > ******************************************************************************/ > -/*$FreeBSD: src/sys/dev/e1000/if_igb.c,v 1.3.2.13 2010/11/27 01:09:54 > jfv Exp $*/ > +/*$FreeBSD: stable/7/sys/dev/e1000/if_igb.c 216465 2010-12-15 22:48:44Z > jfv $*/ > > > #ifdef HAVE_KERNEL_OPTION_HEADERS > @@ -318,10 +318,6 @@ > static bool igb_header_split = FALSE; > TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); > > -/* > -** This will autoconfigure based on > -** the number of CPUs if left at 0. > -*/ > static int igb_num_queues = 0; > TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); > > @@ -784,10 +780,14 @@ > return; > > /* Call cleanup if number of TX descriptors low */ > +#if 0 > if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) > igb_txeof(txr); > +#endif > > while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { > + if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) > + igb_txeof(txr); > if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) { > ifp->if_drv_flags |= IFF_DRV_OACTIVE; > break; > @@ -1162,10 +1162,10 @@ > IGB_TX_LOCK(txr); > if (igb_txeof(txr)) > more = TRUE; > - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) > - igb_start_locked(txr, ifp); > + /*if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) Pointless as > igb_start_locked() checks this right off the bat*/ > + igb_start_locked(txr, ifp); > IGB_TX_UNLOCK(txr); > - if (more) { > + if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) { > taskqueue_enqueue(que->tq, &que->que_task); > return; > } > @@ -1298,15 +1298,20 @@ > struct rx_ring *rxr = que->rxr; > u32 newitr = 0; > bool more_tx, more_rx; > + struct ifnet *ifp = adapter->ifp; > > +#if 0 > E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); > + newitr = E1000_READ_REG(&adapter->hw, E1000_EICR); > +#endif > + E1000_WRITE_REG(&adapter->hw, E1000_EICR, que->eims); > ++que->irqs; > > IGB_TX_LOCK(txr); > more_tx = igb_txeof(txr); > IGB_TX_UNLOCK(txr); > > - more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); > + more_rx = igb_rxeof(que, -1, NULL); > > if (igb_enable_aim == FALSE) > goto no_calc; > @@ -1361,7 +1366,7 @@ > > no_calc: > /* Schedule a clean task if needed*/ > - if (more_tx || more_rx) > + if (more_tx || more_rx || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) > taskqueue_enqueue(que->tq, &que->que_task); > else > /* Reenable this interrupt */ > @@ -1382,6 +1387,7 @@ > struct adapter *adapter = arg; > u32 icr; > > + E1000_WRITE_REG(&adapter->hw, E1000_EICR, adapter->link_mask); > ++adapter->link_irq; > icr = E1000_READ_REG(&adapter->hw, E1000_ICR); > if (!(icr & E1000_ICR_LSC)) > @@ -1535,6 +1541,14 @@ > if (m_head->m_flags & M_VLANTAG) > cmd_type_len |= E1000_ADVTXD_DCMD_VLE; > > +/* > + * We just did this in before invocation, seems completely > + * redundant, igb_handle_queue -> igb_txeof > + * Pretty sure this is impossible as we check for the > + * IGB_TX_CLEANUP_THRESHOLD in igb_start_locked() which happens > + * before this func in invoked > + */ > +#if 0 > /* > * Force a cleanup if number of TX descriptors > * available hits the threshold > @@ -1547,6 +1561,7 @@ > return (ENOBUFS); > } > } > +#endif > > /* > * Map the packet for DMA. > @@ -2082,6 +2097,7 @@ > que->eims = E1000_EICR_TX_QUEUE0 << i; > else > que->eims = 1 << vector; > + device_printf(adapter->dev, "que %d eims= 0x%x\n", i, > que->eims); > /* > ** Bind the msix vector, and thus the > ** rings to the corresponding cpu. > @@ -3264,8 +3280,6 @@ > case ETHERTYPE_IPV6: > ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); > ip_hlen = sizeof(struct ip6_hdr); > - if (mp->m_len < ehdrlen + ip_hlen) > - return (FALSE); > ipproto = ip6->ip6_nxt; > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; > break; > @@ -4461,6 +4475,24 @@ > static void > igb_enable_intr(struct adapter *adapter) > { > + uint32_t ims, regval; > + > + if (adapter->msix_mem) { > + ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC; > + regval = E1000_READ_REG(&adapter->hw, E1000_EIAC); > +#if 0 > + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval | > adapter->eims_mask); > + regval = E1000_READ_REG(&adapter->hw, E1000_EIAM); > + E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval | > adapter->eims_mask); > +#endif > + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, > adapter->eims_mask); > + E1000_WRITE_REG(&adapter->hw, E1000_IMS, ims); > + } else { > + E1000_WRITE_REG(&adapter->hw, E1000_IMS, > IMS_ENABLE_MASK | E1000_IMS_DRSTA); > + E1000_WRITE_REG(&adapter->hw, E1000_IAM, > IMS_ENABLE_MASK | E1000_IMS_DRSTA); > + } > + > +#if 0 > /* With RSS set up what to auto clear */ > if (adapter->msix_mem) { > E1000_WRITE_REG(&adapter->hw, E1000_EIAC, > @@ -4475,6 +4507,7 @@ > E1000_WRITE_REG(&adapter->hw, E1000_IMS, > IMS_ENABLE_MASK); > } > +#endif > E1000_WRITE_FLUSH(&adapter->hw); > > return; > @@ -4483,11 +4516,33 @@ > static void > igb_disable_intr(struct adapter *adapter) > { > + uint32_t regval; > + > + /* > + * we need to be careful when disabling interrupts. The VFs > are also > + * mapped into these registers and so clearing the bits can > cause > + * issues on the VF drivers so we only need to clear what we > set > + */ > + if (adapter->msix_mem) { > + regval = E1000_READ_REG(&adapter->hw, E1000_EIAM); > +#if 0 > + E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval & > ~adapter->eims_mask); > + regval = E1000_READ_REG(&adapter->hw, E1000_EIAC); > + E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval & > ~adapter->eims_mask); > +#endif > + E1000_WRITE_REG(&adapter->hw, E1000_EIMC, > adapter->eims_mask); > + } > + > + E1000_WRITE_REG(&adapter->hw, E1000_IAM, 0); > + E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); > + > +#if 0 > if (adapter->msix_mem) { > E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); > E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); > } > E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); > +#endif > E1000_WRITE_FLUSH(&adapter->hw); > return; > } > @@ -4876,8 +4931,8 @@ > struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); > struct e1000_hw_stats *stats = adapter->stats; > > - struct sysctl_oid *stat_node, *queue_node, *int_node, > *host_node; > - struct sysctl_oid_list *stat_list, *queue_list, *int_list, > *host_list; > + struct sysctl_oid *stat_node, *queue_node, *int_node, > *host_node, *debug_node; > + struct sysctl_oid_list *stat_list, *queue_list, *int_list, > *host_list, *debug_list; > > #define QUEUE_NAME_LEN 32 > char namebuf[QUEUE_NAME_LEN]; > @@ -5245,6 +5300,27 @@ > SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", > CTLFLAG_RD, &stats->hrmpc, > "Header Redirection Missed Packet Count"); > + > + > + debug_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "debug", > + CTLFLAG_RD, NULL, > + "Debug info"); > + > + debug_list = SYSCTL_CHILDREN(debug_node); > + > + txr = adapter->tx_rings; > + SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "desc_avail", > + CTLFLAG_RD, (u_int *)(uintptr_t)&txr->tx_avail, > 0, > + ""); > + > + SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "next_to_clean", > + CTLFLAG_RD, &txr->next_to_clean, 0, > + ""); > + > + SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "if_flags", > + CTLFLAG_RD, &adapter->ifp->if_drv_flags, 0, > + ""); > + > } > > > > _______________________________________________ freebsd-net@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-net To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"