From: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>

Set per-process static sh_mem config only once during process
initialization. Move all static changes from update_qpd() which is
called each time a queue is created to set_cache_memory_policy() which
is called once during process initialization.

set_cache_memory_policy() is currently defined only for cik and vi
family. So this commit only focuses on these two. A separate commit will
address other asics.

Signed-off-by: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +---------
 .../amd/amdkfd/kfd_device_queue_manager_cik.c | 69 ++++++++++++------
 .../amd/amdkfd/kfd_device_queue_manager_vi.c  | 71 ++++++++++++-------
 3 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f3f2fd6ee65c..d23c6a358d34 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2591,14 +2591,6 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
        return retval;
 }
 
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
 static bool set_cache_memory_policy(struct device_queue_manager *dqm,
                                   struct qcm_process_device *qpd,
                                   enum cache_policy default_policy,
@@ -2613,34 +2605,6 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
 
        dqm_lock(dqm);
 
-       if (alternate_aperture_size == 0) {
-               /* base > limit disables APE1 */
-               qpd->sh_mem_ape1_base = 1;
-               qpd->sh_mem_ape1_limit = 0;
-       } else {
-               /*
-                * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
-                *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
-                * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
-                *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
-                * Verify that the base and size parameters can be
-                * represented in this format and convert them.
-                * Additionally restrict APE1 to user-mode addresses.
-                */
-
-               uint64_t base = (uintptr_t)alternate_aperture_base;
-               uint64_t limit = base + alternate_aperture_size - 1;
-
-               if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
-                  (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
-                       retval = false;
-                       goto out;
-               }
-
-               qpd->sh_mem_ape1_base = base >> 16;
-               qpd->sh_mem_ape1_limit = limit >> 16;
-       }
-
        retval = dqm->asic_ops.set_cache_memory_policy(
                        dqm,
                        qpd,
@@ -2649,6 +2613,9 @@ static bool set_cache_memory_policy(struct 
device_queue_manager *dqm,
                        alternate_aperture_base,
                        alternate_aperture_size);
 
+       if (retval)
+               goto out;
+
        if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
                program_sh_mem_settings(dqm, qpd);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..32bedef912b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,6 +27,14 @@
 #include "oss/oss_2_4_sh_mask.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
                                   struct qcm_process_device *qpd,
                                   enum cache_policy default_policy,
@@ -84,6 +92,36 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
 {
        uint32_t default_mtype;
        uint32_t ape1_mtype;
+       unsigned int temp;
+       bool retval = true;
+
+       if (alternate_aperture_size == 0) {
+               /* base > limit disables APE1 */
+               qpd->sh_mem_ape1_base = 1;
+               qpd->sh_mem_ape1_limit = 0;
+       } else {
+               /*
+                * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+                *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
+                * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+                *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+                * Verify that the base and size parameters can be
+                * represented in this format and convert them.
+                * Additionally restrict APE1 to user-mode addresses.
+                */
+
+               uint64_t base = (uintptr_t)alternate_aperture_base;
+               uint64_t limit = base + alternate_aperture_size - 1;
+
+               if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+                  (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+                       retval = false;
+                       goto out;
+               }
+
+               qpd->sh_mem_ape1_base = base >> 16;
+               qpd->sh_mem_ape1_limit = limit >> 16;
+       }
 
        default_mtype = (default_policy == cache_policy_coherent) ?
                        MTYPE_NONCACHED :
@@ -97,37 +135,22 @@ static bool set_cache_memory_policy_cik(struct 
device_queue_manager *dqm,
                        | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
                        | DEFAULT_MTYPE(default_mtype)
                        | APE1_MTYPE(ape1_mtype);
-
-       return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
-                         struct qcm_process_device *qpd)
-{
-       struct kfd_process_device *pdd;
-       unsigned int temp;
-
-       pdd = qpd_to_pdd(qpd);
-
-       /* check if sh_mem_config register already configured */
-       if (qpd->sh_mem_config == 0) {
-               qpd->sh_mem_config =
-                       ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
-                       DEFAULT_MTYPE(MTYPE_NONCACHED) |
-                       APE1_MTYPE(MTYPE_NONCACHED);
-               qpd->sh_mem_ape1_limit = 0;
-               qpd->sh_mem_ape1_base = 0;
-       }
-
        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
         * aperture addresses.
         */
-       temp = get_sh_mem_bases_nybble_64(pdd);
+       temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
 
        pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 
0x%X\n",
                qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
 
+out:
+       return retval;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+                         struct qcm_process_device *qpd)
+{
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index b291ee0fab94..320518f41890 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -27,6 +27,14 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "oss/oss_3_0_sh_mask.h"
 
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
 static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
                                       struct qcm_process_device *qpd,
                                       enum cache_policy default_policy,
@@ -85,6 +93,36 @@ static bool set_cache_memory_policy_vi(struct 
device_queue_manager *dqm,
 {
        uint32_t default_mtype;
        uint32_t ape1_mtype;
+       unsigned int temp;
+       bool retval = true;
+
+       if (alternate_aperture_size == 0) {
+               /* base > limit disables APE1 */
+               qpd->sh_mem_ape1_base = 1;
+               qpd->sh_mem_ape1_limit = 0;
+       } else {
+               /*
+                * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+                *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
+                * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+                *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+                * Verify that the base and size parameters can be
+                * represented in this format and convert them.
+                * Additionally restrict APE1 to user-mode addresses.
+                */
+
+               uint64_t base = (uintptr_t)alternate_aperture_base;
+               uint64_t limit = base + alternate_aperture_size - 1;
+
+               if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+                  (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+                       retval = false;
+                       goto out;
+               }
+
+               qpd->sh_mem_ape1_base = base >> 16;
+               qpd->sh_mem_ape1_limit = limit >> 16;
+       }
 
        default_mtype = (default_policy == cache_policy_coherent) ?
                        MTYPE_UC :
@@ -100,40 +138,21 @@ static bool set_cache_memory_policy_vi(struct 
device_queue_manager *dqm,
                        default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
                        ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
 
-       return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
-                        struct qcm_process_device *qpd)
-{
-       struct kfd_process_device *pdd;
-       unsigned int temp;
-
-       pdd = qpd_to_pdd(qpd);
-
-       /* check if sh_mem_config register already configured */
-       if (qpd->sh_mem_config == 0) {
-               qpd->sh_mem_config =
-                               SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
-                                       SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
-                               MTYPE_UC <<
-                                       SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
-                               MTYPE_UC <<
-                                       SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
-               qpd->sh_mem_ape1_limit = 0;
-               qpd->sh_mem_ape1_base = 0;
-       }
-
        /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
         * aperture addresses.
         */
-       temp = get_sh_mem_bases_nybble_64(pdd);
+       temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
        qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
 
        pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
                temp, qpd->sh_mem_bases);
+out:
+       return retval;
+}
 
+static int update_qpd_vi(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd)
+{
        return 0;
 }
 
-- 
2.34.1

Reply via email to