Register and use AVX CPU intrinsic instructions when available to do
256-bit reads to speed up reading EDC and MC.  Otherwise, fallback to
32-bit reads.  Also align destination buffer on 32-byte boundary.

Signed-off-by: Rahul Lakkireddy <rahul.lakkire...@chelsio.com>
Signed-off-by: Ganesh Goudar <ganes...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/Makefile        |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h  |  2 +
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c   |  7 +-
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h   |  8 +++
 .../ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c   | 78 ++++++++++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c   |  5 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c |  2 +
 7 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile 
b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 0dbaf1b18bac..a0f5239b19d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -12,3 +12,4 @@ cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o 
clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
+cxgb4-$(CONFIG_X86) += cudbg_intrinsic_avx.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
index b57acb8dc35b..4269d1621e9a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -25,6 +25,8 @@
 #define MC1_FLAG 4
 #define HMA_FLAG 5
 
+#define CUDBG_MEM_ALIGN 32
+
 #define CUDBG_ENTITY_SIGNATURE 0xCCEDB001
 
 struct cudbg_mbox_log {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
index 0b80512e5c0c..6ed418d90507 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
@@ -34,5 +34,10 @@ unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
 
 void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init)
 {
-       pdbg_init->intrinsic_cb = cudbg_mem_read_def;
+#ifdef CONFIG_X86
+       if (cudbg_intrinsic_avx_supported())
+               pdbg_init->intrinsic_cb = cudbg_mem_read_avx;
+       else
+#endif
+               pdbg_init->intrinsic_cb = cudbg_mem_read_def;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
index 3af0f07311ec..d878c71ef65d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
@@ -21,5 +21,13 @@
 unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
                                u32 start, u32 offset, u32 size,
                                u32 mem_aperture, u8 *outbuf);
+
+#ifdef CONFIG_X86
+int cudbg_intrinsic_avx_supported(void);
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+                               u32 offset, u32 size, u32 mem_aperture,
+                               u8 *outbuf);
+#endif
+
 void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init);
 #endif /* __CUDBG_INTRINSIC_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
new file mode 100644
index 000000000000..d5bd4dfef428
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic_avx.c
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include <linux/cpufeature.h>
+#include <asm/fpu/api.h>
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_intrinsic.h"
+
+int cudbg_intrinsic_avx_supported(void)
+{
+#ifdef CONFIG_AS_AVX
+       return boot_cpu_has(X86_FEATURE_AVX);
+#else
+       return 0;
+#endif /* CONFIG_AS_AVX */
+}
+
+/* Alignment in bytes for AVX aligned instructions */
+#define CUDBG_MEM_ALIGN_AVX 32
+
+unsigned int cudbg_mem_read_avx(struct cudbg_init *pdbg_init, u32 start,
+                               u32 offset, u32 size, u32 mem_aperture,
+                               u8 *outbuf)
+{
+#ifdef CONFIG_AS_AVX
+       u32 max_read_len = CUDBG_MEM_ALIGN_AVX;
+       struct adapter *adap = pdbg_init->adap;
+       u8 *reg_addr, *src_addr, *dst_addr;
+       u32 bytes_read, read_len;
+
+       reg_addr = (u8 *)adap->regs + start + offset;
+       src_addr = PTR_ALIGN(reg_addr, max_read_len);
+       dst_addr = PTR_ALIGN(outbuf, max_read_len);
+       read_len = min(size, max_read_len);
+
+       /* Don't use intrinsic for following cases:
+        * 1. If reading current offset + 256-bits would
+        *    exceed current window aperture.
+        * 2. Source or Destination address is not aligned
+        *    to 256-bits.
+        * 3. There are less than 256-bits left to read.
+        */
+       if (offset + max_read_len > mem_aperture ||
+           src_addr != reg_addr || dst_addr != outbuf ||
+           read_len < max_read_len) {
+               return cudbg_mem_read_def(pdbg_init, start, offset, size,
+                                         mem_aperture, outbuf);
+       } else {
+               kernel_fpu_begin();
+               asm volatile("vmovdqa %0, %%ymm0" : : "m" (*reg_addr));
+               asm volatile("vmovdqa %%ymm0, %0" : "=m" (*outbuf));
+               kernel_fpu_end();
+               bytes_read = read_len;
+       }
+
+       return bytes_read;
+#else
+       return cudbg_mem_read_def(pdbg_init, start, offset, size, mem_aperture,
+                                 outbuf);
+#endif /* CONFIG_AS_AVX */
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index db1b57a09887..220ba2f60cf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -428,12 +428,15 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, 
u32 *buf_size,
                                           buf,
                                           &total_size);
 
-       if (flag & CXGB4_ETH_DUMP_MEM)
+       if (flag & CXGB4_ETH_DUMP_MEM) {
+               dbg_buff.offset = roundup(dbg_buff.offset, CUDBG_MEM_ALIGN);
+               total_size = roundup(total_size, CUDBG_MEM_ALIGN);
                cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
                                           cxgb4_collect_mem_dump,
                                           ARRAY_SIZE(cxgb4_collect_mem_dump),
                                           buf,
                                           &total_size);
+       }
 
        cudbg_hdr->data_len = total_size;
        *buf_size = total_size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..d437e46f6af6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1362,6 +1362,7 @@ static int set_dump(struct net_device *dev, struct 
ethtool_dump *eth_dump)
        len = sizeof(struct cudbg_hdr) +
              sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
        len += cxgb4_get_dump_length(adapter, eth_dump->flag);
+       len = roundup(len, CUDBG_MEM_ALIGN);
 
        adapter->eth_dump.flag = eth_dump->flag;
        adapter->eth_dump.len = len;
@@ -1391,6 +1392,7 @@ static int get_dump_data(struct net_device *dev, struct 
ethtool_dump *eth_dump,
        len = sizeof(struct cudbg_hdr) +
              sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
        len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag);
+       len = roundup(len, CUDBG_MEM_ALIGN);
        if (eth_dump->len < len)
                return -ENOMEM;
 
-- 
2.14.1

Reply via email to