> If the hardware supports it, the best way to deal with it is to set > up > the driver so that it only ever deals in single pages.
I am working on fixing the driver to support NETIF_F_SG and have changed how it receives packets to follow how the e1000 driver does it. Here is where I am at: When I get the first part of the frame, I allocate an skb for the packet. I call dev->page = alloc_page(GFP_ATOMIC) to allocate a page for the 4080 bytes coming from the MAL. I then setup a DMA mapping for that page to get the data out of the MAL (the original code simply used dma_map_single, but I need a page). Once the DMA map has been setup and data transferred, I call skb_fill_page_desc() to put the data into the skb. I then wrote a function called emac_consume_page, which unmaps the DMA mapping, frees the page, and updates the lengths in the skb. The relevant source code is at the end of this email. My problem is this: When I run this code, it appears to create the fragmented packet just fine, but when it passes it up the stack, the kernel spits out these bugs, one after another: BUG: Bad page state in process swapper pfn:0ee9b page:c051f360 flags:(null) count:-3 mapcount:0 mapping:(null) index:766 Call Trace: [c032bc30] [c0006ef0] show_stack+0x44/0x16c (unreliable) [c032bc70] [c006c438] bad_page+0x94/0x130 [c032bc90] [c006d4a0] get_page_from_freelist+0x458/0x4d4 [c032bd20] [c006d5f4] __alloc_pages_nodemask+0xd8/0x4f8 [c032bda0] [c01a1174] emac_poll_rx+0x300/0x9c8 [c032bdf0] [c019cb64] mal_poll+0xa8/0x1ec [c032be20] [c01cf218] net_rx_action+0x9c/0x1b4 [c032be50] [c0039678] __do_softirq+0xc4/0x148 [c032be90] [c0004d18] do_softirq+0x78/0x80 [c032bea0] [c0039264] irq_exit+0x64/0x7c [c032beb0] [c0005210] do_IRQ+0x9c/0xb4 [c032bed0] [c000fa7c] ret_from_except+0x0/0x18 [c032bf90] [c000808c] cpu_idle+0xdc/0xec [c032bfb0] [c00028fc] rest_init+0x70/0x84 [c032bfc0] [c02e0864] start_kernel+0x240/0x2c4 [c032bff0] [c0002254] start_here+0x44/0xb0 BUG: Bad page state in process swapper pfn:0ee8c page:c051f180 flags:(null) count:-3 mapcount:0 mapping:(null) index:757 Call Trace: [c032bc30] [c0006ef0] show_stack+0x44/0x16c (unreliable) [c032bc70] [c006c438] bad_page+0x94/0x130 [c032bc90] [c006d4a0] get_page_from_freelist+0x458/0x4d4 [c032bd20] [c006d5f4] __alloc_pages_nodemask+0xd8/0x4f8 [c032bda0] [c01a1174] emac_poll_rx+0x300/0x9c8 [c032bdf0] [c019cb64] mal_poll+0xa8/0x1ec [c032be20] [c01cf218] net_rx_action+0x9c/0x1b4 [c032be50] [c0039678] __do_softirq+0xc4/0x148 [c032be90] [c0004d18] do_softirq+0x78/0x80 [c032bea0] [c0039264] irq_exit+0x64/0x7c [c032beb0] [c0005210] do_IRQ+0x9c/0xb4 [c032bed0] [c000fa7c] ret_from_except+0x0/0x18 [c032bf90] [c000808c] cpu_idle+0xdc/0xec [c032bfb0] [c00028fc] rest_init+0x70/0x84 [c032bfc0] [c02e0864] start_kernel+0x240/0x2c4 [c032bff0] [c0002254] start_here+0x44/0xb0 I know that I am missing something when it comes to allocating the pages for the fragments, but when I compare my methodology to the e1000 driver, they appear to be functionally the same? Any ideas? I can send the entire source file for the driver if needs be. Thanks! Jonathan Here is the source: static int emac_poll_rx(void *param, int budget) { ... /* Other code is here */ push_packet: skb->dev = dev->ndev; skb->protocol = eth_type_trans(skb, dev->ndev); emac_rx_csum(dev, skb, ctrl); if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) ++dev->estats.rx_dropped_stack; next: ++dev->stats.rx_packets; skip: dev->stats.rx_bytes += len; slot = (slot + 1) % NUM_RX_BUFF; --budget; ++received; continue; sg: if (ctrl & MAL_RX_CTRL_FIRST) { BUG_ON(dev->rx_sg_skb); if (unlikely(emac_alloc_rx_skb2(dev, slot, GFP_ATOMIC))) { DBG(dev, "rx OOM %d (%d) (%d)" NL, slot, dev->rx_skb_size, len); ++dev->estats.rx_dropped_oom; emac_recycle_rx_skb(dev, slot, 0); } else { dev->rx_sg_skb = skb; skb_fill_page_desc(dev->rx_sg_skb, 0, dev->page, 0, len); emac_consume_page(dev, len, slot); dev->rx_sg_skb->len += ETH_HLEN; } } else if (!emac_rx_sg_append(dev, slot) && (ctrl & MAL_RX_CTRL_LAST)) { skb = dev->rx_sg_skb; dev->rx_sg_skb = NULL; ctrl &= EMAC_BAD_RX_MASK; if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { emac_parse_rx_error(dev, ctrl); ++dev->estats.rx_dropped_error; dev_kfree_skb(skb); len = 0; } else goto push_packet; } ... /* Other code is here */ } /* end of emac_poll_rx */ static inline int emac_alloc_rx_skb2(struct emac_instance *dev, int slot, gfp_t flags) { struct sk_buff *skb = alloc_skb(242, flags); if (unlikely(!skb)) return -ENOMEM; dev->rx_skb[slot] = skb; dev->rx_desc[slot].data_len = 0; dev->page = alloc_page(flags); DBG(dev, "emac_alloc_skb2: page %x" NL, dev->page); if(unlikely(!dev->page)) { return -1; } dev->rx_desc[slot].data_ptr = dma_map_page(&dev->ofdev->dev, dev->page, 0, 4096, DMA_FROM_DEVICE); wmb(); dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); return 0; } /* end of emac_alloc_rx_skb2 */ static inline void emac_consume_page(struct emac_instance* dev, int length, int slot) { dma_unmap_page(&dev->ofdev->dev, dev->rx_desc[slot].data_ptr, 4096, DMA_FROM_DEVICE); wmb(); __free_page(dev->page); dev->page = NULL; dev->rx_sg_skb->len += length; dev->rx_sg_skb->data_len += length; dev->rx_sg_skb->truesize += length; } static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) { if (likely(dev->rx_sg_skb != NULL)) { int len = dev->rx_desc[slot].data_len; int tot_len = dev->rx_sg_skb->len + len; if (unlikely(tot_len + 2 > dev->max_mtu)) { ++dev->estats.rx_dropped_mtu; dev_kfree_skb(dev->rx_sg_skb); dev->rx_sg_skb = NULL; } else { dev->page = alloc_page(GFP_ATOMIC); if(unlikely(!dev->page)) { return -ENOMEM; } dev->rx_desc[slot].data_ptr = dma_map_page(&dev->ofdev->dev, dev->page, 0, 4096, DMA_FROM_DEVICE); dev->rx_desc[slot].data_len = 0; wmb(); dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); skb_fill_page_desc(dev->rx_sg_skb, skb_shinfo(dev->rx_sg_skb)->nr_frags, dev->page, 0, len); emac_consume_page(dev, len, slot); return 0; } } emac_recycle_rx_skb(dev, slot, 0); return -1; } /* end of emac_rx_sg_append */ _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev