On Mon, 19 Aug 2019 18:31:33 +0200, Thomas Bogendoerfer wrote:
> Buffers alloacted by alloc_skb() are already cache aligned so there
> is no need for an extra align done by ioc3_alloc_skb. And instead
> of skb_put/skb_trim simply use one skb_put after frame size is known
> during receive.
> 
> Signed-off-by: Thomas Bogendoerfer <tbogendoer...@suse.de>
> ---
>  drivers/net/ethernet/sgi/ioc3-eth.c | 50 
> ++++++++-----------------------------
>  1 file changed, 11 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c 
> b/drivers/net/ethernet/sgi/ioc3-eth.c
> index c875640926d6..d862f28887f9 100644
> --- a/drivers/net/ethernet/sgi/ioc3-eth.c
> +++ b/drivers/net/ethernet/sgi/ioc3-eth.c
> @@ -11,7 +11,6 @@
>   *
>   * To do:
>   *
> - *  o Handle allocation failures in ioc3_alloc_skb() more gracefully.
>   *  o Handle allocation failures in ioc3_init_rings().
>   *  o Use prefetching for large packets.  What is a good lower limit for
>   *    prefetching?
> @@ -72,6 +71,12 @@
>  #define TX_RING_ENTRIES              128
>  #define TX_RING_MASK         (TX_RING_ENTRIES - 1)
>  
> +/* BEWARE: The IOC3 documentation documents the size of rx buffers as
> + * 1644 while it's actually 1664.  This one was nasty to track down...
> + */
> +#define RX_OFFSET            10
> +#define RX_BUF_SIZE          1664
> +
>  #define ETCSR_FD   ((17 << ETCSR_IPGR2_SHIFT) | (11 << ETCSR_IPGR1_SHIFT) | 
> 21)
>  #define ETCSR_HD   ((21 << ETCSR_IPGR2_SHIFT) | (21 << ETCSR_IPGR1_SHIFT) | 
> 21)
>  
> @@ -111,31 +116,6 @@ static void ioc3_init(struct net_device *dev);
>  static const char ioc3_str[] = "IOC3 Ethernet";
>  static const struct ethtool_ops ioc3_ethtool_ops;
>  
> -/* We use this to acquire receive skb's that we can DMA directly into. */
> -
> -#define IOC3_CACHELINE       128UL

Is the cache line on the platform this driver works on 128B?
This looks like a DMA engine alignment requirement, more than an
optimization.

The comment in __alloc_skb() says:

        /* We do our best to align skb_shared_info on a separate cache
         * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
         * aligned memory blocks, unless SLUB/SLAB debug is enabled.
         * Both skb->head and skb_shared_info are cache line aligned.
         */

note the "unless".

> -static inline unsigned long aligned_rx_skb_addr(unsigned long addr)
> -{
> -     return (~addr + 1) & (IOC3_CACHELINE - 1UL);
> -}
> -
> -static inline struct sk_buff *ioc3_alloc_skb(unsigned long length,
> -                                          unsigned int gfp_mask)
> -{
> -     struct sk_buff *skb;
> -
> -     skb = alloc_skb(length + IOC3_CACHELINE - 1, gfp_mask);
> -     if (likely(skb)) {
> -             int offset = aligned_rx_skb_addr((unsigned long)skb->data);
> -
> -             if (offset)
> -                     skb_reserve(skb, offset);
> -     }
> -
> -     return skb;
> -}
> -
>  static inline unsigned long ioc3_map(void *ptr, unsigned long vdev)
>  {
>  #ifdef CONFIG_SGI_IP27
> @@ -148,12 +128,6 @@ static inline unsigned long ioc3_map(void *ptr, unsigned 
> long vdev)
>  #endif
>  }
>  
> -/* BEWARE: The IOC3 documentation documents the size of rx buffers as
> - * 1644 while it's actually 1664.  This one was nasty to track down ...
> - */
> -#define RX_OFFSET            10
> -#define RX_BUF_ALLOC_SIZE    (1664 + RX_OFFSET + IOC3_CACHELINE)
> -
>  #define IOC3_SIZE 0x100000
>  
>  static inline u32 mcr_pack(u32 pulse, u32 sample)
> @@ -534,10 +508,10 @@ static inline void ioc3_rx(struct net_device *dev)
>               err = be32_to_cpu(rxb->err);            /* It's valid ...  */
>               if (err & ERXBUF_GOODPKT) {
>                       len = ((w0 >> ERXBUF_BYTECNT_SHIFT) & 0x7ff) - 4;
> -                     skb_trim(skb, len);
> +                     skb_put(skb, len);
>                       skb->protocol = eth_type_trans(skb, dev);
>  
> -                     new_skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
> +                     new_skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC);
>                       if (!new_skb) {
>                               /* Ouch, drop packet and just recycle packet
>                                * to keep the ring filled.
> @@ -546,6 +520,7 @@ static inline void ioc3_rx(struct net_device *dev)
>                               new_skb = skb;
>                               goto next;
>                       }
> +                     new_skb->dev = dev;

Assigning dev pointer seems unrelated to the rest of the patch?

>                       if (likely(dev->features & NETIF_F_RXCSUM))
>                               ioc3_tcpudp_checksum(skb,
> @@ -556,8 +531,6 @@ static inline void ioc3_rx(struct net_device *dev)
>  
>                       ip->rx_skbs[rx_entry] = NULL;   /* Poison  */
>  
> -                     /* Because we reserve afterwards. */
> -                     skb_put(new_skb, (1664 + RX_OFFSET));
>                       rxb = (struct ioc3_erxbuf *)new_skb->data;
>                       skb_reserve(new_skb, RX_OFFSET);
>  
> @@ -846,16 +819,15 @@ static void ioc3_alloc_rings(struct net_device *dev)
>               for (i = 0; i < RX_BUFFS; i++) {
>                       struct sk_buff *skb;
>  
> -                     skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
> +                     skb = alloc_skb(RX_BUF_SIZE, GFP_ATOMIC);
>                       if (!skb) {
>                               show_free_areas(0, NULL);
>                               continue;
>                       }
> +                     skb->dev = dev;
>  
>                       ip->rx_skbs[i] = skb;
>  
> -                     /* Because we reserve afterwards. */
> -                     skb_put(skb, (1664 + RX_OFFSET));
>                       rxb = (struct ioc3_erxbuf *)skb->data;
>                       rxr[i] = cpu_to_be64(ioc3_map(rxb, 1));
>                       skb_reserve(skb, RX_OFFSET);

Reply via email to