Export callback to register supported intrinsic instructions to speed
up reading EDC and MC.  By default, use 32-bit reads.  Also rework
logic to read EDC and MC.

Signed-off-by: Rahul Lakkireddy <rahul.lakkire...@chelsio.com>
Signed-off-by: Ganesh Goudar <ganes...@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/Makefile        |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h      |   6 +
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c   |  38 ++++
 .../net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h   |  25 +++
 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c     |  70 +++++++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |   5 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c   |   2 +
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         | 193 +++++++++++++--------
 8 files changed, 267 insertions(+), 74 deletions(-)
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile 
b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 8c9c6b0d2e5d..0dbaf1b18bac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o 
cxgb4_ethtool.o \
              cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
              cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
-             cudbg_common.o cudbg_lib.o
+             cudbg_common.o cudbg_lib.o cudbg_intrinsic.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
index 88e740082a02..456d61eacb27 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
@@ -83,10 +83,16 @@ enum cudbg_dbg_entity_type {
        CUDBG_MAX_ENTITY = 70,
 };
 
+struct cudbg_init;
+typedef unsigned int (*cudbg_intrinsic_t)(struct cudbg_init *pdbg_init,
+                                         u32 start, u32 offset, u32 size,
+                                         u32 max_size, u8 *buf);
+
 struct cudbg_init {
        struct adapter *adap; /* Pointer to adapter structure */
        void *outbuf; /* Output buffer */
        u32 outbuf_size;  /* Output buffer size */
+       cudbg_intrinsic_t intrinsic_cb; /* CPU intrinsic callback */
 };
 
 static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
new file mode 100644
index 000000000000..0b80512e5c0c
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.c
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_intrinsic.h"
+
+unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
+                               u32 start, u32 offset, u32 size,
+                               u32 mem_aperture, u8 *outbuf)
+{
+       struct adapter *adap = pdbg_init->adap;
+       __be32 *buf = (__be32 *)outbuf;
+
+       *buf = le32_to_cpu((__force __le32)
+                          t4_read_reg(adap, start + offset));
+
+       return sizeof(__be32);
+}
+
+void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init)
+{
+       pdbg_init->intrinsic_cb = cudbg_mem_read_def;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
new file mode 100644
index 000000000000..3af0f07311ec
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_intrinsic.h
@@ -0,0 +1,25 @@
+/*
+ *  Copyright (C) 2018 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_INTRINSIC_H__
+#define __CUDBG_INTRINSIC_H__
+
+unsigned int cudbg_mem_read_def(struct cudbg_init *pdbg_init,
+                               u32 start, u32 offset, u32 size,
+                               u32 mem_aperture, u8 *outbuf);
+void cudbg_set_intrinsic_callback(struct cudbg_init *pdbg_init);
+#endif /* __CUDBG_INTRINSIC_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c 
b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 0a3871f10787..4921460aa787 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -18,6 +18,7 @@
 #include <linux/sort.h>
 
 #include "t4_regs.h"
+#include "t4_values.h"
 #include "cxgb4.h"
 #include "cudbg_if.h"
 #include "cudbg_lib_common.h"
@@ -840,6 +841,69 @@ static int cudbg_get_payload_range(struct adapter *padap, 
u8 mem_type,
                                      &payload->start, &payload->end);
 }
 
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+                            int mtype, u32 addr, u32 len, void *hbuf)
+{
+       u32 win_pf, memoffset, mem_aperture, mem_base;
+       struct adapter *adap = pdbg_init->adap;
+       u32 pos, offset, resid, read_len;
+       u8 *buf;
+       int ret;
+
+       /* Argument sanity checks ...
+        */
+       if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+               return -EINVAL;
+
+       buf = (u8 *)hbuf;
+
+       /* Try to do 32-bit reads.  Residual will be handled later. */
+       resid = len & 0x3;
+       len -= resid;
+
+       ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+                               &mem_aperture);
+       if (ret)
+               return ret;
+
+       addr = addr + memoffset;
+       win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+       pos = addr & ~(mem_aperture - 1);
+       offset = addr - pos;
+
+       /* Set up initial PCI-E Memory Window to cover the start of our
+        * transfer.
+        */
+       t4_memory_update_win(adap, win, pos | win_pf);
+
+       /* Transfer data from the adapter */
+       while (len > 0) {
+               read_len = pdbg_init->intrinsic_cb(pdbg_init, mem_base,
+                                                  offset, len, mem_aperture,
+                                                  buf);
+               buf += read_len;
+               offset += read_len;
+               len -= read_len;
+
+               /* If we've reached the end of our current window aperture,
+                * move the PCI-E Memory Window on to the next.
+                */
+               if (offset == mem_aperture) {
+                       pos += mem_aperture;
+                       offset = 0;
+                       t4_memory_update_win(adap, win, pos | win_pf);
+               }
+       }
+
+       /* Transfer residual */
+       if (resid)
+               t4_memory_rw_residual(adap, resid, mem_base + offset, buf,
+                                     T4_MEMORY_READ);
+
+       return 0;
+}
+
 #define CUDBG_YIELD_ITERATION 256
 
 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -899,10 +963,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
                                goto skip_read;
 
                spin_lock(&padap->win0_lock);
-               rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
-                                 bytes_read, bytes,
-                                 (__be32 *)temp_buff.data,
-                                 1);
+               rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+                                      bytes_read, bytes, temp_buff.data);
                spin_unlock(&padap->win0_lock);
                if (rc) {
                        cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index baa67d362051..92f1d87adf9f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1471,6 +1471,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
 u32 t4_get_util_window(struct adapter *adap);
 void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
 
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+                     u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+                          int dir);
 #define T4_MEMORY_WRITE        0
 #define T4_MEMORY_READ 1
 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index a2d6c8a69c52..db1b57a09887 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -18,6 +18,7 @@
 #include "t4_regs.h"
 #include "cxgb4.h"
 #include "cxgb4_cudbg.h"
+#include "cudbg_intrinsic.h"
 
 static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = {
        { CUDBG_EDC0, cudbg_collect_edc0_meminfo },
@@ -395,6 +396,7 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, 
u32 *buf_size,
        cudbg_init.adap = adap;
        cudbg_init.outbuf = buf;
        cudbg_init.outbuf_size = size;
+       cudbg_set_intrinsic_callback(&cudbg_init);
 
        dbg_buff.data = buf;
        dbg_buff.size = size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c 
b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 6d76851a4da9..e4d7da15c3b6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -481,6 +481,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
        return 0;
 }
 
+/**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+                     u32 *mem_base, u32 *mem_aperture)
+{
+       u32 edc_size, mc_size, mem_reg;
+
+       /* Offset into the region of memory which is being accessed
+        * MEM_EDC0 = 0
+        * MEM_EDC1 = 1
+        * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+        * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+        * MEM_HMA  = 4
+        */
+       edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+       if (mtype == MEM_HMA) {
+               *mem_off = 2 * (edc_size * 1024 * 1024);
+       } else if (mtype != MEM_MC1) {
+               *mem_off = (mtype * (edc_size * 1024 * 1024));
+       } else {
+               mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+                                                     MA_EXT_MEMORY0_BAR_A));
+               *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+       }
+
+       /* Each PCI-E Memory Window is programmed with a window size -- or
+        * "aperture" -- which controls the granularity of its mapping onto
+        * adapter memory.  We need to grab that aperture in order to know
+        * how to use the specified window.  The window is also programmed
+        * with the base address of the Memory Window in BAR0's address
+        * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+        * the address is relative to BAR0.
+        */
+       mem_reg = t4_read_reg(adap,
+                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+                                                 win));
+       /* a dead adapter will return 0xffffffff for PIO reads */
+       if (mem_reg == 0xffffffff)
+               return -ENXIO;
+
+       *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+       *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+       if (is_t4(adap->params.chip))
+               *mem_base -= adap->t4_bar0;
+
+       return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+       t4_write_reg(adap,
+                    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+                    addr);
+       /* Read it back to ensure that changes propagate before we
+        * attempt to use the new value.
+        */
+       t4_read_reg(adap,
+                   PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+                          int dir)
+{
+       union {
+               u32 word;
+               char byte[4];
+       } last;
+       unsigned char *bp;
+       int i;
+
+       if (dir == T4_MEMORY_READ) {
+               last.word = le32_to_cpu((__force __le32)
+                                       t4_read_reg(adap, addr));
+               for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+                       bp[i] = last.byte[i];
+       } else {
+               last.word = *buf;
+               for (i = off; i < 4; i++)
+                       last.byte[i] = 0;
+               t4_write_reg(adap, addr,
+                            (__force u32)cpu_to_le32(last.word));
+       }
+}
+
 /**
  *     t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  *     @adap: the adapter
@@ -502,8 +613,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, 
u32 addr,
                 u32 len, void *hbuf, int dir)
 {
        u32 pos, offset, resid, memoffset;
-       u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+       u32 win_pf, mem_aperture, mem_base;
        u32 *buf;
+       int ret;
 
        /* Argument sanity checks ...
         */
@@ -519,59 +631,26 @@ int t4_memory_rw(struct adapter *adap, int win, int 
mtype, u32 addr,
        resid = len & 0x3;
        len -= resid;
 
-       /* Offset into the region of memory which is being accessed
-        * MEM_EDC0 = 0
-        * MEM_EDC1 = 1
-        * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
-        * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
-        * MEM_HMA  = 4
-        */
-       edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
-       if (mtype == MEM_HMA) {
-               memoffset = 2 * (edc_size * 1024 * 1024);
-       } else if (mtype != MEM_MC1) {
-               memoffset = (mtype * (edc_size * 1024 * 1024));
-       } else {
-               mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
-                                                     MA_EXT_MEMORY0_BAR_A));
-               memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-       }
+       ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+                               &mem_aperture);
+       if (ret)
+               return ret;
 
        /* Determine the PCIE_MEM_ACCESS_OFFSET */
        addr = addr + memoffset;
 
-       /* Each PCI-E Memory Window is programmed with a window size -- or
-        * "aperture" -- which controls the granularity of its mapping onto
-        * adapter memory.  We need to grab that aperture in order to know
-        * how to use the specified window.  The window is also programmed
-        * with the base address of the Memory Window in BAR0's address
-        * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-        * the address is relative to BAR0.
-        */
-       mem_reg = t4_read_reg(adap,
-                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
-                                                 win));
-       mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
-       mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
-       if (is_t4(adap->params.chip))
-               mem_base -= adap->t4_bar0;
        win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
 
        /* Calculate our initial PCI-E Memory Window Position and Offset into
         * that Window.
         */
-       pos = addr & ~(mem_aperture-1);
+       pos = addr & ~(mem_aperture - 1);
        offset = addr - pos;
 
        /* Set up initial PCI-E Memory Window to cover the start of our
-        * transfer.  (Read it back to ensure that changes propagate before we
-        * attempt to use the new value.)
+        * transfer.
         */
-       t4_write_reg(adap,
-                    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
-                    pos | win_pf);
-       t4_read_reg(adap,
-                   PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+       t4_memory_update_win(adap, win, pos | win_pf);
 
        /* Transfer data to/from the adapter as long as there's an integral
         * number of 32-bit transfers to complete.
@@ -626,12 +705,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, 
u32 addr,
                if (offset == mem_aperture) {
                        pos += mem_aperture;
                        offset = 0;
-                       t4_write_reg(adap,
-                               PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-                                                   win), pos | win_pf);
-                       t4_read_reg(adap,
-                               PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-                                                   win));
+                       t4_memory_update_win(adap, win, pos | win_pf);
                }
        }
 
@@ -640,28 +714,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, 
u32 addr,
         * residual amount.  The PCI-E Memory Window has already been moved
         * above (if necessary) to cover this final transfer.
         */
-       if (resid) {
-               union {
-                       u32 word;
-                       char byte[4];
-               } last;
-               unsigned char *bp;
-               int i;
-
-               if (dir == T4_MEMORY_READ) {
-                       last.word = le32_to_cpu(
-                                       (__force __le32)t4_read_reg(adap,
-                                               mem_base + offset));
-                       for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
-                               bp[i] = last.byte[i];
-               } else {
-                       last.word = *buf;
-                       for (i = resid; i < 4; i++)
-                               last.byte[i] = 0;
-                       t4_write_reg(adap, mem_base + offset,
-                                    (__force u32)cpu_to_le32(last.word));
-               }
-       }
+       if (resid)
+               t4_memory_rw_residual(adap, resid, mem_base + offset,
+                                     (u8 *)buf, dir);
 
        return 0;
 }
-- 
2.14.1

Reply via email to