From: Sven Van Asbroeck <thesve...@gmail.com>

On cpu architectures w/o dma cache snooping, dma_unmap() is a
is a very expensive operation, because its resulting sync
needs to invalidate cpu caches.

Increase efficiency/performance by syncing only those sections
of the lan743x's rx ring buffers that are actually in use.

Signed-off-by: Sven Van Asbroeck <thesve...@gmail.com>
---

To: Bryan Whitehead <bryan.whiteh...@microchip.com>
To: unglinuxdri...@microchip.com
To: "David S. Miller" <da...@davemloft.net>
To: Jakub Kicinski <k...@kernel.org>
Cc: Andrew Lunn <and...@lunn.ch>
Cc: Alexey Denisov <rtg...@gmail.com>
Cc: Sergej Bauer <sba...@blackbox.su>
Cc: Tim Harvey <thar...@gateworks.com>
Cc: Anders Rønningen <and...@ronningen.priv.no>
Cc: Hillf Danton <hdan...@sina.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Willem de Bruijn <willemdebruijn.ker...@gmail.com>
Cc: net...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

 drivers/net/ethernet/microchip/lan743x_main.c | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c 
b/drivers/net/ethernet/microchip/lan743x_main.c
index c2633efe6067..6b642691a676 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1968,35 +1968,52 @@ static int lan743x_rx_init_ring_element(struct 
lan743x_rx *rx, int index)
        struct net_device *netdev = rx->adapter->netdev;
        struct device *dev = &rx->adapter->pdev->dev;
        struct lan743x_rx_buffer_info *buffer_info;
+       unsigned int buffer_length, used_length;
        struct lan743x_rx_descriptor *descriptor;
        struct sk_buff *skb;
        dma_addr_t dma_ptr;
-       int length;
 
-       length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING;
+       buffer_length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING;
 
        descriptor = &rx->ring_cpu_ptr[index];
        buffer_info = &rx->buffer_info[index];
-       skb = __netdev_alloc_skb(netdev, length, GFP_ATOMIC | GFP_DMA);
+       skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA);
        if (!skb)
                return -ENOMEM;
-       dma_ptr = dma_map_single(dev, skb->data, length, DMA_FROM_DEVICE);
+       dma_ptr = dma_map_single(dev, skb->data, buffer_length, 
DMA_FROM_DEVICE);
        if (dma_mapping_error(dev, dma_ptr)) {
                dev_kfree_skb_any(skb);
                return -ENOMEM;
        }
-       if (buffer_info->dma_ptr)
-               dma_unmap_single(dev, buffer_info->dma_ptr,
-                                buffer_info->buffer_length, DMA_FROM_DEVICE);
+       if (buffer_info->dma_ptr) {
+               /* sync used area of buffer only */
+               if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_LS_)
+                       /* frame length is valid only if LS bit is set.
+                        * it's a safe upper bound for the used area in this
+                        * buffer.
+                        */
+                       used_length = min(RX_DESC_DATA0_FRAME_LENGTH_GET_
+                                         (le32_to_cpu(descriptor->data0)),
+                                         buffer_info->buffer_length);
+               else
+                       used_length = buffer_info->buffer_length;
+               dma_sync_single_for_cpu(dev, buffer_info->dma_ptr,
+                                       used_length,
+                                       DMA_FROM_DEVICE);
+               dma_unmap_single_attrs(dev, buffer_info->dma_ptr,
+                                      buffer_info->buffer_length,
+                                      DMA_FROM_DEVICE,
+                                      DMA_ATTR_SKIP_CPU_SYNC);
+       }
 
        buffer_info->skb = skb;
        buffer_info->dma_ptr = dma_ptr;
-       buffer_info->buffer_length = length;
+       buffer_info->buffer_length = buffer_length;
        descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(buffer_info->dma_ptr));
        descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(buffer_info->dma_ptr));
        descriptor->data3 = 0;
        descriptor->data0 = cpu_to_le32((RX_DESC_DATA0_OWN_ |
-                           (length & RX_DESC_DATA0_BUF_LENGTH_MASK_)));
+                           (buffer_length & RX_DESC_DATA0_BUF_LENGTH_MASK_)));
        lan743x_rx_update_tail(rx, index);
 
        return 0;
-- 
2.17.1

Reply via email to