NB: I thought I'd have a close look at this since I thought I
understand NAPI pretty well, but using NAPI to transmit frames as well
as with a usb device has got me pretty confused. Also, I suspect that
you didn't try compiling this against the net-next kernel.

I'm stopping my review only partially completed, please address issues
https://patchwork.kernel.org/project/netdevbpf/patch/20210204113121.29786-2-john.efstathia...@pebblebay.com/

It might make it easier for reviewers to split the "infrastructure"
refactors this patch uses into separate pieces. I know it is more work
and this is tested already by you, but this is a pretty complicated
chunk of code to review.

John Efstathiades wrote:

> Improve driver throughput and reduce CPU overhead by using the NAPI
> interface for processing Rx packets and scheduling Tx and Rx URBs.
> 
> Provide statically-allocated URB and buffer pool for both Tx and Rx
> packets to give greater control over resource allocation.
> 
> Remove modification of hard_header_len that prevents correct operation
> of generic receive offload (GRO) handling of TCP connections.
> 
> Signed-off-by: John Efstathiades <john.efstathia...@pebblebay.com>
> ---
>  drivers/net/usb/lan78xx.c | 1176 ++++++++++++++++++++++++-------------
>  1 file changed, 775 insertions(+), 401 deletions(-)
> 
> diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
> index e81c5699c952..1c872edc816a 100644
> --- a/drivers/net/usb/lan78xx.c
> +++ b/drivers/net/usb/lan78xx.c
> @@ -47,17 +47,17 @@
>  
>  #define MAX_RX_FIFO_SIZE             (12 * 1024)
>  #define MAX_TX_FIFO_SIZE             (12 * 1024)
> -#define DEFAULT_BURST_CAP_SIZE               (MAX_TX_FIFO_SIZE)
> -#define DEFAULT_BULK_IN_DELAY                (0x0800)
>  #define MAX_SINGLE_PACKET_SIZE               (9000)
>  #define DEFAULT_TX_CSUM_ENABLE               (true)
>  #define DEFAULT_RX_CSUM_ENABLE               (true)
>  #define DEFAULT_TSO_CSUM_ENABLE              (true)
>  #define DEFAULT_VLAN_FILTER_ENABLE   (true)
>  #define DEFAULT_VLAN_RX_OFFLOAD              (true)
> -#define TX_OVERHEAD                  (8)
> +#define TX_ALIGNMENT                 (4)
>  #define RXW_PADDING                  2
>  
> +#define MIN_IPV4_DGRAM                       68
> +
>  #define LAN78XX_USB_VENDOR_ID                (0x0424)
>  #define LAN7800_USB_PRODUCT_ID               (0x7800)
>  #define LAN7850_USB_PRODUCT_ID               (0x7850)
> @@ -78,6 +78,44 @@
>                                        WAKE_MCAST | WAKE_BCAST | \
>                                        WAKE_ARP | WAKE_MAGIC)
>  
> +#define LAN78XX_NAPI_WEIGHT          64
> +
> +#define TX_URB_NUM                   10
> +#define TX_SS_URB_NUM                        TX_URB_NUM
> +#define TX_HS_URB_NUM                        TX_URB_NUM
> +#define TX_FS_URB_NUM                        TX_URB_NUM
> +
> +/* A single URB buffer must be large enough to hold a complete jumbo packet
> + */
> +#define TX_SS_URB_SIZE                       (32 * 1024)

wow, 32k per allocation! Only 30 of them I guess.

> +#define TX_HS_URB_SIZE                       (16 * 1024)
> +#define TX_FS_URB_SIZE                       (10 * 1024)
> +
> +#define RX_SS_URB_NUM                        30
> +#define RX_HS_URB_NUM                        10
> +#define RX_FS_URB_NUM                        10
> +#define RX_SS_URB_SIZE                       TX_SS_URB_SIZE
> +#define RX_HS_URB_SIZE                       TX_HS_URB_SIZE
> +#define RX_FS_URB_SIZE                       TX_FS_URB_SIZE
> +
> +#define SS_BURST_CAP_SIZE            RX_SS_URB_SIZE
> +#define SS_BULK_IN_DELAY             0x2000
> +#define HS_BURST_CAP_SIZE            RX_HS_URB_SIZE
> +#define HS_BULK_IN_DELAY             0x2000
> +#define FS_BURST_CAP_SIZE            RX_FS_URB_SIZE
> +#define FS_BULK_IN_DELAY             0x2000
> +
> +#define TX_CMD_LEN                   8
> +#define TX_SKB_MIN_LEN                       (TX_CMD_LEN + ETH_HLEN)
> +#define LAN78XX_TSO_SIZE(dev)                ((dev)->tx_urb_size - 
> TX_SKB_MIN_LEN)
> +
> +#define RX_CMD_LEN                   10
> +#define RX_SKB_MIN_LEN                       (RX_CMD_LEN + ETH_HLEN)
> +#define RX_MAX_FRAME_LEN(mtu)                ((mtu) + ETH_HLEN + VLAN_HLEN)
> +
> +#define LAN78XX_MIN_MTU                      MIN_IPV4_DGRAM
> +#define LAN78XX_MAX_MTU                      MAX_SINGLE_PACKET_SIZE
> +
>  /* USB related defines */
>  #define BULK_IN_PIPE                 1
>  #define BULK_OUT_PIPE                        2
> @@ -366,15 +404,22 @@ struct lan78xx_net {
>       struct usb_interface    *intf;
>       void                    *driver_priv;
>  
> -     int                     rx_qlen;
> -     int                     tx_qlen;
> +     int                     tx_pend_data_len;
> +     int                     n_tx_urbs;
> +     int                     n_rx_urbs;
> +     int                     rx_urb_size;
> +     int                     tx_urb_size;
> +
> +     struct sk_buff_head     rxq_free;
> +     struct sk_buff_head     rxq_overflow;
> +     struct sk_buff_head     rxq_done;
>       struct sk_buff_head     rxq;
> +     struct sk_buff_head     txq_free;
>       struct sk_buff_head     txq;
> -     struct sk_buff_head     done;
> -     struct sk_buff_head     rxq_pause;
>       struct sk_buff_head     txq_pend;
>  
> -     struct tasklet_struct   bh;
> +     struct napi_struct      napi;
> +
>       struct delayed_work     wq;
>  
>       int                     msg_enable;
> @@ -385,16 +430,15 @@ struct lan78xx_net {
>       struct mutex            phy_mutex; /* for phy access */
>       unsigned                pipe_in, pipe_out, pipe_intr;
>  
> -     u32                     hard_mtu;       /* count any extra framing */
> -     size_t                  rx_urb_size;    /* size for rx urbs */
> +     unsigned int            bulk_in_delay;
> +     unsigned int            burst_cap;
>  
>       unsigned long           flags;
>  
>       wait_queue_head_t       *wait;
>       unsigned char           suspend_count;
>  
> -     unsigned                maxpacket;
> -     struct timer_list       delay;
> +     unsigned int            maxpacket;
>       struct timer_list       stat_monitor;
>  
>       unsigned long           data[5];
> @@ -425,6 +469,128 @@ static int msg_level = -1;
>  module_param(msg_level, int, 0);
>  MODULE_PARM_DESC(msg_level, "Override default message level");
>  
> +static inline struct sk_buff *lan78xx_get_buf(struct sk_buff_head *buf_pool)
> +{
> +     if (!skb_queue_empty(buf_pool))
> +             return skb_dequeue(buf_pool);
> +     else
> +             return NULL;
> +}
> +
> +static inline void lan78xx_free_buf(struct sk_buff_head *buf_pool,
> +                                 struct sk_buff *buf)
> +{
> +     buf->data = buf->head;
> +     skb_reset_tail_pointer(buf);
> +     buf->len = 0;
> +     buf->data_len = 0;
> +
> +     skb_queue_tail(buf_pool, buf);
> +}
> +
> +static void lan78xx_free_buf_pool(struct sk_buff_head *buf_pool)
> +{
> +     struct sk_buff *buf;
> +     struct skb_data *entry;
> +
> +     while (!skb_queue_empty(buf_pool)) {
> +             buf = skb_dequeue(buf_pool);
> +             if (buf) {
> +                     entry = (struct skb_data *)buf->cb;
> +                     usb_free_urb(entry->urb);
> +                     dev_kfree_skb_any(buf);
> +             }
> +     }
> +}
> +
> +static int lan78xx_alloc_buf_pool(struct sk_buff_head *buf_pool,
> +                               int n_urbs, int urb_size,
> +                               struct lan78xx_net *dev)
> +{
> +     int i;
> +     struct sk_buff *buf;
> +     struct skb_data *entry;
> +     struct urb *urb;
> +
> +     skb_queue_head_init(buf_pool);
> +
> +     for (i = 0; i < n_urbs; i++) {
> +             buf = alloc_skb(urb_size, GFP_ATOMIC);
> +             if (!buf)
> +                     goto error;
> +
> +             if (skb_linearize(buf) != 0) {
> +                     dev_kfree_skb_any(buf);
> +                     goto error;
> +             }

Why did you need to do the linearize? The alloc_skb should never give
you back a fragmented data area. You're only paying the linearize cost
during pool create, so that's good, but I still wonder why it's
necessary?

> +
> +             urb = usb_alloc_urb(0, GFP_ATOMIC);
> +             if (!urb) {
> +                     dev_kfree_skb_any(buf);
> +                     goto error;
> +             }
> +
> +             entry = (struct skb_data *)buf->cb;
> +             entry->urb = urb;
> +             entry->dev = dev;
> +             entry->length = 0;
> +             entry->num_of_packet = 0;
> +
> +             skb_queue_tail(buf_pool, buf);
> +     }
> +
> +     return 0;
> +
> +error:
> +     lan78xx_free_buf_pool(buf_pool);
> +
> +     return -ENOMEM;
> +}
> +
> +static inline struct sk_buff *lan78xx_get_rx_buf(struct lan78xx_net *dev)
> +{
> +     return lan78xx_get_buf(&dev->rxq_free);
> +}
> +
> +static inline void lan78xx_free_rx_buf(struct lan78xx_net *dev,
> +                                    struct sk_buff *rx_buf)
> +{
> +     lan78xx_free_buf(&dev->rxq_free, rx_buf);
> +}
> +
> +static void lan78xx_free_rx_resources(struct lan78xx_net *dev)
> +{
> +     lan78xx_free_buf_pool(&dev->rxq_free);
> +}
> +
> +static int lan78xx_alloc_rx_resources(struct lan78xx_net *dev)
> +{
> +     return lan78xx_alloc_buf_pool(&dev->rxq_free,
> +                                   dev->n_rx_urbs, dev->rx_urb_size, dev);
> +}
> +
> +static inline struct sk_buff *lan78xx_get_tx_buf(struct lan78xx_net *dev)
> +{
> +     return lan78xx_get_buf(&dev->txq_free);
> +}
> +
> +static inline void lan78xx_free_tx_buf(struct lan78xx_net *dev,
> +                                    struct sk_buff *tx_buf)
> +{
> +     lan78xx_free_buf(&dev->txq_free, tx_buf);
> +}
> +
> +static void lan78xx_free_tx_resources(struct lan78xx_net *dev)
> +{
> +     lan78xx_free_buf_pool(&dev->txq_free);
> +}
> +
> +static int lan78xx_alloc_tx_resources(struct lan78xx_net *dev)
> +{
> +     return lan78xx_alloc_buf_pool(&dev->txq_free,
> +                                   dev->n_tx_urbs, dev->tx_urb_size, dev);
> +}
> +
>  static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
>  {
>       u32 *buf = kmalloc(sizeof(u32), GFP_KERNEL);
> @@ -1135,7 +1301,7 @@ static int lan78xx_update_flowcontrol(struct 
> lan78xx_net *dev, u8 duplex,
>               flow |= FLOW_CR_RX_FCEN_;
>  
>       if (dev->udev->speed == USB_SPEED_SUPER)
> -             fct_flow = 0x817;
> +             fct_flow = 0x812;

These kind of unexplained changes of magic numbers in the middle of a
NAPI patch make me nervous.


>       else if (dev->udev->speed == USB_SPEED_HIGH)
>               fct_flow = 0x211;
>  
> @@ -1151,6 +1317,8 @@ static int lan78xx_update_flowcontrol(struct 
> lan78xx_net *dev, u8 duplex,
>       return 0;
>  }
>  
> +static void lan78xx_rx_urb_submit_all(struct lan78xx_net *dev);
> +
>  static int lan78xx_link_reset(struct lan78xx_net *dev)
>  {
>       struct phy_device *phydev = dev->net->phydev;
> @@ -1223,7 +1391,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
>                                 jiffies + STAT_UPDATE_TIMER);
>               }
>  
> -             tasklet_schedule(&dev->bh);
> +             lan78xx_rx_urb_submit_all(dev);
> +
> +             napi_schedule(&dev->napi);
>       }
>  
>       return ret;
> @@ -2196,7 +2366,8 @@ static int lan78xx_set_rx_max_frame_length(struct 
> lan78xx_net *dev, int size)
>  
>       /* add 4 to size for FCS */
>       buf &= ~MAC_RX_MAX_SIZE_MASK_;
> -     buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_);
> +     buf |= (((size + ETH_FCS_LEN) << MAC_RX_MAX_SIZE_SHIFT_) &
> +             MAC_RX_MAX_SIZE_MASK_);

comment above no longer applies?

>  
>       lan78xx_write_reg(dev, MAC_RX, buf);
>  
> @@ -2256,28 +2427,26 @@ static int unlink_urbs(struct lan78xx_net *dev, 
> struct sk_buff_head *q)
>  static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
>  {
>       struct lan78xx_net *dev = netdev_priv(netdev);
> -     int ll_mtu = new_mtu + netdev->hard_header_len;
> -     int old_hard_mtu = dev->hard_mtu;
> -     int old_rx_urb_size = dev->rx_urb_size;
> +     int max_frame_len = RX_MAX_FRAME_LEN(new_mtu);
> +     int ret;
> +
> +     if (new_mtu < LAN78XX_MIN_MTU ||
> +         new_mtu > LAN78XX_MAX_MTU)
> +             return -EINVAL;
>  
>       /* no second zero-length packet read wanted after mtu-sized packets */
> -     if ((ll_mtu % dev->maxpacket) == 0)
> +     if ((max_frame_len % dev->maxpacket) == 0)
>               return -EDOM;
>  
> -     lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
> +     ret = usb_autopm_get_interface(dev->intf);
> +     if (ret < 0)
> +             return ret;
> +
> +     ret = lan78xx_set_rx_max_frame_length(dev, max_frame_len);
>  
>       netdev->mtu = new_mtu;
>  

Shouldn't this just be using the new min_mtu/max_mtu stuff? These kind
of changes should have been in a separate patch, they have nothing to
do with NAPI conversion...

> -     dev->hard_mtu = netdev->mtu + netdev->hard_header_len;
> -     if (dev->rx_urb_size == old_hard_mtu) {
> -             dev->rx_urb_size = dev->hard_mtu;
> -             if (dev->rx_urb_size > old_rx_urb_size) {
> -                     if (netif_running(dev->net)) {
> -                             unlink_urbs(dev, &dev->rxq);
> -                             tasklet_schedule(&dev->bh);
> -                     }
> -             }
> -     }
> +     usb_autopm_put_interface(dev->intf);
>  
>       return 0;
>  }
> @@ -2435,6 +2604,44 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev)
>       lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]);
>  }
>  
> +static int lan78xx_urb_config_init(struct lan78xx_net *dev)
> +{
> +     int result = 0;
> +
> +     switch (dev->udev->speed) {
> +     case USB_SPEED_SUPER:
> +             dev->rx_urb_size = RX_SS_URB_SIZE;
> +             dev->tx_urb_size = TX_SS_URB_SIZE;
> +             dev->n_rx_urbs = RX_SS_URB_NUM;
> +             dev->n_tx_urbs = TX_SS_URB_NUM;
> +             dev->bulk_in_delay = SS_BULK_IN_DELAY;
> +             dev->burst_cap = SS_BURST_CAP_SIZE / SS_USB_PKT_SIZE;
> +             break;
> +     case USB_SPEED_HIGH:
> +             dev->rx_urb_size = RX_HS_URB_SIZE;
> +             dev->tx_urb_size = TX_HS_URB_SIZE;
> +             dev->n_rx_urbs = RX_HS_URB_NUM;
> +             dev->n_tx_urbs = TX_HS_URB_NUM;
> +             dev->bulk_in_delay = HS_BULK_IN_DELAY;
> +             dev->burst_cap = HS_BURST_CAP_SIZE / HS_USB_PKT_SIZE;
> +             break;
> +     case USB_SPEED_FULL:
> +             dev->rx_urb_size = RX_FS_URB_SIZE;
> +             dev->tx_urb_size = TX_FS_URB_SIZE;
> +             dev->n_rx_urbs = RX_FS_URB_NUM;
> +             dev->n_tx_urbs = TX_FS_URB_NUM;
> +             dev->bulk_in_delay = FS_BULK_IN_DELAY;
> +             dev->burst_cap = FS_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
> +             break;
> +     default:
> +             netdev_warn(dev->net, "USB bus speed not supported\n");
> +             result = -EIO;
> +             break;
> +     }
> +
> +     return result;
> +}
> +
>  static int lan78xx_reset(struct lan78xx_net *dev)
>  {
>       struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
> @@ -2473,25 +2680,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
>       /* Init LTM */
>       lan78xx_init_ltm(dev);
>  
> -     if (dev->udev->speed == USB_SPEED_SUPER) {
> -             buf = DEFAULT_BURST_CAP_SIZE / SS_USB_PKT_SIZE;
> -             dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
> -             dev->rx_qlen = 4;
> -             dev->tx_qlen = 4;
> -     } else if (dev->udev->speed == USB_SPEED_HIGH) {
> -             buf = DEFAULT_BURST_CAP_SIZE / HS_USB_PKT_SIZE;
> -             dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
> -             dev->rx_qlen = RX_MAX_QUEUE_MEMORY / dev->rx_urb_size;
> -             dev->tx_qlen = RX_MAX_QUEUE_MEMORY / dev->hard_mtu;
> -     } else {
> -             buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
> -             dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
> -             dev->rx_qlen = 4;
> -             dev->tx_qlen = 4;
> -     }
> -
> -     ret = lan78xx_write_reg(dev, BURST_CAP, buf);
> -     ret = lan78xx_write_reg(dev, BULK_IN_DLY, DEFAULT_BULK_IN_DELAY);
> +     ret = lan78xx_write_reg(dev, BURST_CAP, dev->burst_cap);
> +     ret = lan78xx_write_reg(dev, BULK_IN_DLY, dev->bulk_in_delay);
>  
>       ret = lan78xx_read_reg(dev, HW_CFG, &buf);
>       buf |= HW_CFG_MEF_;
> @@ -2501,6 +2691,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
>       buf |= USB_CFG_BCE_;
>       ret = lan78xx_write_reg(dev, USB_CFG0, buf);
>  
> +     netdev_info(dev->net, "USB_CFG0 0x%08x\n", buf);
> +
>       /* set FIFO sizes */
>       buf = (MAX_RX_FIFO_SIZE - 512) / 512;
>       ret = lan78xx_write_reg(dev, FCT_RX_FIFO_END, buf);
> @@ -2561,7 +2753,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
>       ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf);
>  
>       ret = lan78xx_set_rx_max_frame_length(dev,
> -                                           dev->net->mtu + VLAN_ETH_HLEN);
> +                                           RX_MAX_FRAME_LEN(dev->net->mtu));
>  
>       ret = lan78xx_read_reg(dev, MAC_RX, &buf);
>       buf |= MAC_RX_RXEN_;
> @@ -2600,6 +2792,8 @@ static void lan78xx_init_stats(struct lan78xx_net *dev)
>       set_bit(EVENT_STAT_UPDATE, &dev->flags);
>  }
>  
> +static int rx_submit(struct lan78xx_net *dev, struct sk_buff *rx_buf, gfp_t 
> flags);
> +
>  static int lan78xx_open(struct net_device *net)
>  {
>       struct lan78xx_net *dev = netdev_priv(net);
> @@ -2631,6 +2825,8 @@ static int lan78xx_open(struct net_device *net)
>  
>       dev->link_on = false;
>  
> +     napi_enable(&dev->napi);
> +
>       lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
>  done:
>       usb_autopm_put_interface(dev->intf);
> @@ -2639,11 +2835,14 @@ static int lan78xx_open(struct net_device *net)
>       return ret;
>  }
>  
> +static int lan78x_tx_pend_skb_get(struct lan78xx_net *dev, struct sk_buff 
> **skb);
> +
>  static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
>  {
>       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(unlink_wakeup);
>       DECLARE_WAITQUEUE(wait, current);
>       int temp;
> +     struct sk_buff *skb;
>  
>       /* ensure there are no more active urbs */
>       add_wait_queue(&unlink_wakeup, &wait);
> @@ -2652,17 +2851,26 @@ static void lan78xx_terminate_urbs(struct lan78xx_net 
> *dev)
>       temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq);
>  
>       /* maybe wait for deletions to finish. */
> -     while (!skb_queue_empty(&dev->rxq) &&
> -            !skb_queue_empty(&dev->txq) &&
> -            !skb_queue_empty(&dev->done)) {
> +     while (!skb_queue_empty(&dev->rxq) ||
> +            !skb_queue_empty(&dev->txq)) {
>               schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
>               set_current_state(TASK_UNINTERRUPTIBLE);
>               netif_dbg(dev, ifdown, dev->net,
> -                       "waited for %d urb completions\n", temp);
> +                       "waited for %d urb completions", temp);
>       }
>       set_current_state(TASK_RUNNING);
>       dev->wait = NULL;
>       remove_wait_queue(&unlink_wakeup, &wait);
> +
> +     /* empty Rx done, Rx overflow and Tx pend queues
> +      */

single line comment doesn't need \n before */

> +     while (!skb_queue_empty(&dev->rxq_done)) {
> +             skb = skb_dequeue(&dev->rxq_done);
> +             lan78xx_free_rx_buf(dev, skb);
> +     }
> +
> +     skb_queue_purge(&dev->rxq_overflow);
> +     skb_queue_purge(&dev->txq_pend);
>  }
>  
>  static int lan78xx_stop(struct net_device *net)
> @@ -2672,78 +2880,34 @@ static int lan78xx_stop(struct net_device *net)
>       if (timer_pending(&dev->stat_monitor))
>               del_timer_sync(&dev->stat_monitor);
>  
> -     if (net->phydev)
> -             phy_stop(net->phydev);
> -
>       clear_bit(EVENT_DEV_OPEN, &dev->flags);
>       netif_stop_queue(net);
> +     napi_disable(&dev->napi);
> +
> +     lan78xx_terminate_urbs(dev);
>  
>       netif_info(dev, ifdown, dev->net,
>                  "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n",
>                  net->stats.rx_packets, net->stats.tx_packets,
>                  net->stats.rx_errors, net->stats.tx_errors);
>  
> -     lan78xx_terminate_urbs(dev);
> +     if (net->phydev)
> +             phy_stop(net->phydev);
>  
>       usb_kill_urb(dev->urb_intr);
>  
> -     skb_queue_purge(&dev->rxq_pause);
> -
>       /* deferred work (task, timer, softirq) must also stop.
>        * can't flush_scheduled_work() until we drop rtnl (later),
>        * else workers could deadlock; so make workers a NOP.
>        */
>       dev->flags = 0;
>       cancel_delayed_work_sync(&dev->wq);
> -     tasklet_kill(&dev->bh);
>  
>       usb_autopm_put_interface(dev->intf);
>  
>       return 0;
>  }
>  
> -static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
> -                                    struct sk_buff *skb, gfp_t flags)
> -{
> -     u32 tx_cmd_a, tx_cmd_b;
> -     void *ptr;
> -
> -     if (skb_cow_head(skb, TX_OVERHEAD)) {
> -             dev_kfree_skb_any(skb);
> -             return NULL;
> -     }
> -
> -     if (skb_linearize(skb)) {
> -             dev_kfree_skb_any(skb);
> -             return NULL;
> -     }
> -
> -     tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN_MASK_) | TX_CMD_A_FCS_;
> -
> -     if (skb->ip_summed == CHECKSUM_PARTIAL)
> -             tx_cmd_a |= TX_CMD_A_IPE_ | TX_CMD_A_TPE_;
> -
> -     tx_cmd_b = 0;
> -     if (skb_is_gso(skb)) {
> -             u16 mss = max(skb_shinfo(skb)->gso_size, TX_CMD_B_MSS_MIN_);
> -
> -             tx_cmd_b = (mss << TX_CMD_B_MSS_SHIFT_) & TX_CMD_B_MSS_MASK_;
> -
> -             tx_cmd_a |= TX_CMD_A_LSO_;
> -     }
> -
> -     if (skb_vlan_tag_present(skb)) {
> -             tx_cmd_a |= TX_CMD_A_IVTG_;
> -             tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_;
> -     }
> -
> -     ptr = skb_push(skb, 8);
> -     put_unaligned_le32(tx_cmd_a, ptr);
> -     put_unaligned_le32(tx_cmd_b, ptr + 4);
> -
> -     return skb;
> -}
> -
>  static enum skb_state defer_bh(struct lan78xx_net *dev, struct sk_buff *skb,
>                              struct sk_buff_head *list, enum skb_state state)
>  {
> @@ -2752,17 +2916,21 @@ static enum skb_state defer_bh(struct lan78xx_net 
> *dev, struct sk_buff *skb,
>       struct skb_data *entry = (struct skb_data *)skb->cb;
>  
>       spin_lock_irqsave(&list->lock, flags);
> +
>       old_state = entry->state;
>       entry->state = state;
>  
>       __skb_unlink(skb, list);
> +
>       spin_unlock(&list->lock);
> -     spin_lock(&dev->done.lock);
> +     spin_lock(&dev->rxq_done.lock);
> +
> +     __skb_queue_tail(&dev->rxq_done, skb);
> +
> +     if (skb_queue_len(&dev->rxq_done) == 1)
> +             napi_schedule(&dev->napi);
>  
> -     __skb_queue_tail(&dev->done, skb);
> -     if (skb_queue_len(&dev->done) == 1)
> -             tasklet_schedule(&dev->bh);
> -     spin_unlock_irqrestore(&dev->done.lock, flags);
> +     spin_unlock_irqrestore(&dev->rxq_done.lock, flags);
>  
>       return old_state;
>  }
> @@ -2773,11 +2941,14 @@ static void tx_complete(struct urb *urb)
>       struct skb_data *entry = (struct skb_data *)skb->cb;
>       struct lan78xx_net *dev = entry->dev;
>  
> +     netif_dbg(dev, tx_done, dev->net,
> +               "tx done: status %d\n", urb->status);
> +
>       if (urb->status == 0) {
>               dev->net->stats.tx_packets += entry->num_of_packet;
>               dev->net->stats.tx_bytes += entry->length;
>       } else {
> -             dev->net->stats.tx_errors++;
> +             dev->net->stats.tx_errors += entry->num_of_packet;
>  
>               switch (urb->status) {
>               case -EPIPE:
> @@ -2803,7 +2974,15 @@ static void tx_complete(struct urb *urb)
>  
>       usb_autopm_put_interface_async(dev->intf);
>  
> -     defer_bh(dev, skb, &dev->txq, tx_done);
> +     skb_unlink(skb, &dev->txq);
> +
> +     lan78xx_free_tx_buf(dev, skb);
> +
> +     /* Re-schedule NAPI if Tx data pending but no URBs in progress.
> +      */
> +     if (skb_queue_empty(&dev->txq) &&
> +         !skb_queue_empty(&dev->txq_pend))
> +             napi_schedule(&dev->napi);
>  }
>  
>  static void lan78xx_queue_skb(struct sk_buff_head *list,
> @@ -2815,32 +2994,102 @@ static void lan78xx_queue_skb(struct sk_buff_head 
> *list,
>       entry->state = state;
>  }
>  
> +#define LAN78XX_TX_URB_SPACE(dev) (skb_queue_len(&(dev)->txq_free) * \
> +                                (dev)->tx_urb_size)
> +
> +static int lan78xx_tx_pend_data_len(struct lan78xx_net *dev)
> +{
> +     return dev->tx_pend_data_len;
> +}
> +
> +static int lan78x_tx_pend_skb_add(struct lan78xx_net *dev, struct sk_buff 
> *skb)
> +{
> +     int tx_pend_data_len;
> +     unsigned long flags;
> +
> +     spin_lock_irqsave(&dev->txq_pend.lock, flags);
> +
> +     __skb_queue_tail(&dev->txq_pend, skb);
> +
> +     dev->tx_pend_data_len += skb->len;
> +     tx_pend_data_len = dev->tx_pend_data_len;
> +
> +     spin_unlock_irqrestore(&dev->txq_pend.lock, flags);
> +
> +     return tx_pend_data_len;
> +}
> +
> +static int lan78x_tx_pend_skb_head_add(struct lan78xx_net *dev, struct 
> sk_buff *skb)
> +{
> +     int tx_pend_data_len;
> +     unsigned long flags;
> +
> +     spin_lock_irqsave(&dev->txq_pend.lock, flags);
> +
> +     __skb_queue_head(&dev->txq_pend, skb);
> +
> +     dev->tx_pend_data_len += skb->len;
> +     tx_pend_data_len = dev->tx_pend_data_len;
> +
> +     spin_unlock_irqrestore(&dev->txq_pend.lock, flags);
> +
> +     return tx_pend_data_len;
> +}
> +
> +static int lan78x_tx_pend_skb_get(struct lan78xx_net *dev, struct sk_buff 
> **skb)
> +{
> +     int tx_pend_data_len;
> +     unsigned long flags;
> +
> +     spin_lock_irqsave(&dev->txq_pend.lock, flags);
> +
> +     *skb = __skb_dequeue(&dev->txq_pend);
> +
> +     if (*skb)
> +             dev->tx_pend_data_len -= (*skb)->len;
> +     tx_pend_data_len = dev->tx_pend_data_len;
> +
> +     spin_unlock_irqrestore(&dev->txq_pend.lock, flags);
> +
> +     return tx_pend_data_len;
> +}
> +
>  static netdev_tx_t
>  lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
>  {
> +     int tx_pend_data_len;
> +
>       struct lan78xx_net *dev = netdev_priv(net);
> -     struct sk_buff *skb2 = NULL;
>  
> -     if (skb) {
> -             skb_tx_timestamp(skb);
> -             skb2 = lan78xx_tx_prep(dev, skb, GFP_ATOMIC);
> -     }
> +     /* Get the deferred work handler to resume the
> +      * device if it's suspended.
> +      */
> +     if (test_bit(EVENT_DEV_ASLEEP, &dev->flags))
> +             schedule_delayed_work(&dev->wq, 0);
>  
> -     if (skb2) {
> -             skb_queue_tail(&dev->txq_pend, skb2);
> +     skb_tx_timestamp(skb);
>  
> -             /* throttle TX patch at slower than SUPER SPEED USB */
> -             if ((dev->udev->speed < USB_SPEED_SUPER) &&
> -                 (skb_queue_len(&dev->txq_pend) > 10))
> -                     netif_stop_queue(net);
> -     } else {
> -             netif_dbg(dev, tx_err, dev->net,
> -                       "lan78xx_tx_prep return NULL\n");
> -             dev->net->stats.tx_errors++;
> -             dev->net->stats.tx_dropped++;
> -     }
> +     tx_pend_data_len = lan78x_tx_pend_skb_add(dev, skb);
> +
> +     /* Set up a Tx URB if none is in progress.
> +      */
> +     if (skb_queue_empty(&dev->txq))
> +             napi_schedule(&dev->napi);
> +
> +     /* Stop stack Tx queue if we have enough data to fill
> +      * all the free Tx URBs.
> +      */
> +     if (tx_pend_data_len > LAN78XX_TX_URB_SPACE(dev)) {
> +             netif_stop_queue(net);
>  
> -     tasklet_schedule(&dev->bh);
> +             netif_dbg(dev, hw, dev->net, "tx data len: %d, urb space %d\n",
> +                       tx_pend_data_len, LAN78XX_TX_URB_SPACE(dev));
> +
> +             /* Kick off transmission of pending data */
> +
> +             if (!skb_queue_empty(&dev->txq_free))
> +                     napi_schedule(&dev->napi);
> +     }
>  
>       return NETDEV_TX_OK;
>  }
> @@ -2897,9 +3146,6 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct 
> usb_interface *intf)
>               goto out1;
>       }
>  
> -     dev->net->hard_header_len += TX_OVERHEAD;
> -     dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
> -
>       /* Init all registers */
>       ret = lan78xx_reset(dev);
>       if (ret) {
> @@ -2978,12 +3224,7 @@ static void lan78xx_rx_vlan_offload(struct lan78xx_net 
> *dev,
>  
>  static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
>  {
> -     int status;
> -
> -     if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
> -             skb_queue_tail(&dev->rxq_pause, skb);
> -             return;
> -     }
> +     gro_result_t gro_result;
>  
>       dev->net->stats.rx_packets++;
>       dev->net->stats.rx_bytes += skb->len;
> @@ -2997,21 +3238,24 @@ static void lan78xx_skb_return(struct lan78xx_net 
> *dev, struct sk_buff *skb)
>       if (skb_defer_rx_timestamp(skb))
>               return;
>  
> -     status = netif_rx(skb);
> -     if (status != NET_RX_SUCCESS)
> -             netif_dbg(dev, rx_err, dev->net,
> -                       "netif_rx status %d\n", status);
> +     gro_result = napi_gro_receive(&dev->napi, skb);
> +
> +     if (gro_result == GRO_DROP)
> +             netif_dbg(dev, rx_err, dev->net, "GRO packet dropped\n");
>  }

GRO_DROP no longer exists, what kernel did you compile test this on?

Reply via email to