For some GPUs with more CUs, the original sibling_map[32]
in struct crat_subtype_cache is not enough
to save the cache information when create the VCRAT table,
so fill the cache info into struct kfd_cache_properties_ext
directly to fix the problem.

At the same time, a new directory
"/sys/class/kfd/kfd/topology/nodes/*nodes_num*/caches_ext"
is created for cache information showing.

The original directory "cache" is reserved for GPU which using real CRAT
table.

Signed-off-by: Ma Jun <jun....@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c     | 1229 +-------------------
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1246 ++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   21 +
 3 files changed, 1261 insertions(+), 1235 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 4857ec5b9f46..e6928c60338e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -30,799 +30,6 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
-/* Static table to describe GPU Cache information */
-struct kfd_gpu_cache_info {
-       uint32_t        cache_size;
-       uint32_t        cache_level;
-       uint32_t        flags;
-       /* Indicates how many Compute Units share this cache
-        * within a SA. Value = 1 indicates the cache is not shared
-        */
-       uint32_t        num_cu_shared;
-};
-
-static struct kfd_gpu_cache_info kaveri_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache (in SQC module) per bank */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache (in SQC module) per bank */
-               .cache_size = 8,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-
-       /* TODO: Add L2 Cache information */
-};
-
-
-static struct kfd_gpu_cache_info carrizo_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache (in SQC module) per bank */
-               .cache_size = 8,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 4,
-       },
-       {
-               /* Scalar L1 Data Cache (in SQC module) per bank. */
-               .cache_size = 4,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 4,
-       },
-
-       /* TODO: Add L2 Cache information */
-};
-
-#define hawaii_cache_info kaveri_cache_info
-#define tonga_cache_info carrizo_cache_info
-#define fiji_cache_info  carrizo_cache_info
-#define polaris10_cache_info carrizo_cache_info
-#define polaris11_cache_info carrizo_cache_info
-#define polaris12_cache_info carrizo_cache_info
-#define vegam_cache_info carrizo_cache_info
-
-/* NOTE: L1 cache information has been updated and L2/L3
- * cache information has been added for Vega10 and
- * newer ASICs. The unit for cache_size is KiB.
- * In future,  check & update cache details
- * for every new ASIC is required.
- */
-
-static struct kfd_gpu_cache_info vega10_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 4096,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 16,
-       },
-};
-
-static struct kfd_gpu_cache_info raven_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 1024,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 11,
-       },
-};
-
-static struct kfd_gpu_cache_info renoir_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 1024,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-};
-
-static struct kfd_gpu_cache_info vega12_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 2048,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 5,
-       },
-};
-
-static struct kfd_gpu_cache_info vega20_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 3,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 8192,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 16,
-       },
-};
-
-static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 8192,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 14,
-       },
-};
-
-static struct kfd_gpu_cache_info navi10_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 4096,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-};
-
-static struct kfd_gpu_cache_info vangogh_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 1024,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-};
-
-static struct kfd_gpu_cache_info navi14_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 12,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 2048,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 12,
-       },
-};
-
-static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 4096,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-       {
-               /* L3 Data Cache per GPU */
-               .cache_size = 128*1024,
-               .cache_level = 3,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-};
-
-static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 3072,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-       {
-               /* L3 Data Cache per GPU */
-               .cache_size = 96*1024,
-               .cache_level = 3,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 10,
-       },
-};
-
-static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 2048,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-       {
-               /* L3 Data Cache per GPU */
-               .cache_size = 32*1024,
-               .cache_level = 3,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-};
-
-static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 1024,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-       {
-               /* L3 Data Cache per GPU */
-               .cache_size = 16*1024,
-               .cache_level = 3,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 8,
-       },
-};
-
-static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 6,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 2048,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 6,
-       },
-};
-
-static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
-       {
-               /* TCP L1 Cache per CU */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 1,
-       },
-       {
-               /* Scalar L1 Instruction Cache per SQC */
-               .cache_size = 32,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_INST_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* Scalar L1 Data Cache per SQC */
-               .cache_size = 16,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* GL1 Data Cache per SA */
-               .cache_size = 128,
-               .cache_level = 1,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-       {
-               /* L2 Data Cache per GPU (Total Tex Cache) */
-               .cache_size = 256,
-               .cache_level = 2,
-               .flags = (CRAT_CACHE_FLAGS_ENABLED |
-                               CRAT_CACHE_FLAGS_DATA_CACHE |
-                               CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 2,
-       },
-};
-
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
                struct crat_subtype_computeunit *cu)
 {
@@ -1223,419 +430,6 @@ int kfd_parse_crat_table(void *crat_image, struct 
list_head *device_list,
        return ret;
 }
 
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
-                               struct kfd_gpu_cache_info *pcache_info,
-                               struct kfd_cu_info *cu_info,
-                               int mem_available,
-                               int cu_bitmask,
-                               int cache_type, unsigned int cu_processor_id,
-                               int cu_block)
-{
-       unsigned int cu_sibling_map_mask;
-       int first_active_cu;
-
-       /* First check if enough memory is available */
-       if (sizeof(struct crat_subtype_cache) > mem_available)
-               return -ENOMEM;
-
-       cu_sibling_map_mask = cu_bitmask;
-       cu_sibling_map_mask >>= cu_block;
-       cu_sibling_map_mask &=
-               ((1 << pcache_info[cache_type].num_cu_shared) - 1);
-       first_active_cu = ffs(cu_sibling_map_mask);
-
-       /* CU could be inactive. In case of shared cache find the first active
-        * CU. and incase of non-shared cache check if the CU is inactive. If
-        * inactive active skip it
-        */
-       if (first_active_cu) {
-               memset(pcache, 0, sizeof(struct crat_subtype_cache));
-               pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
-               pcache->length = sizeof(struct crat_subtype_cache);
-               pcache->flags = pcache_info[cache_type].flags;
-               pcache->processor_id_low = cu_processor_id
-                                        + (first_active_cu - 1);
-               pcache->cache_level = pcache_info[cache_type].cache_level;
-               pcache->cache_size = pcache_info[cache_type].cache_size;
-
-               /* Sibling map is w.r.t processor_id_low, so shift out
-                * inactive CU
-                */
-               cu_sibling_map_mask =
-                       cu_sibling_map_mask >> (first_active_cu - 1);
-
-               pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
-               pcache->sibling_map[1] =
-                               (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-               pcache->sibling_map[2] =
-                               (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-               pcache->sibling_map[3] =
-                               (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-               return 0;
-       }
-       return 1;
-}
-
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
-                               struct kfd_gpu_cache_info *pcache_info,
-                               struct kfd_cu_info *cu_info,
-                               int mem_available,
-                               int cache_type, unsigned int cu_processor_id)
-{
-       unsigned int cu_sibling_map_mask;
-       int first_active_cu;
-       int i, j, k;
-
-       /* First check if enough memory is available */
-       if (sizeof(struct crat_subtype_cache) > mem_available)
-               return -ENOMEM;
-
-       cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
-       cu_sibling_map_mask &=
-               ((1 << pcache_info[cache_type].num_cu_shared) - 1);
-       first_active_cu = ffs(cu_sibling_map_mask);
-
-       /* CU could be inactive. In case of shared cache find the first active
-        * CU. and incase of non-shared cache check if the CU is inactive. If
-        * inactive active skip it
-        */
-       if (first_active_cu) {
-               memset(pcache, 0, sizeof(struct crat_subtype_cache));
-               pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
-               pcache->length = sizeof(struct crat_subtype_cache);
-               pcache->flags = pcache_info[cache_type].flags;
-               pcache->processor_id_low = cu_processor_id
-                                        + (first_active_cu - 1);
-               pcache->cache_level = pcache_info[cache_type].cache_level;
-               pcache->cache_size = pcache_info[cache_type].cache_size;
-
-               /* Sibling map is w.r.t processor_id_low, so shift out
-                * inactive CU
-                */
-               cu_sibling_map_mask =
-                       cu_sibling_map_mask >> (first_active_cu - 1);
-               k = 0;
-               for (i = 0; i < cu_info->num_shader_engines; i++) {
-                       for (j = 0; j < cu_info->num_shader_arrays_per_engine;
-                               j++) {
-                               pcache->sibling_map[k] =
-                                (uint8_t)(cu_sibling_map_mask & 0xFF);
-                               pcache->sibling_map[k+1] =
-                                (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-                               pcache->sibling_map[k+2] =
-                                (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-                               pcache->sibling_map[k+3] =
-                                (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-                               k += 4;
-                               cu_sibling_map_mask =
-                                       cu_info->cu_bitmap[i % 4][j + i / 4];
-                               cu_sibling_map_mask &= (
-                                (1 << pcache_info[cache_type].num_cu_shared)
-                                - 1);
-                       }
-               }
-               return 0;
-       }
-       return 1;
-}
-
-#define KFD_MAX_CACHE_TYPES 6
-
-static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
-                                                  struct kfd_gpu_cache_info 
*pcache_info)
-{
-       struct amdgpu_device *adev = kdev->adev;
-       int i = 0;
-
-       /* TCP L1 Cache per CU */
-       if (adev->gfx.config.gc_tcp_l1_size) {
-               pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
-               pcache_info[i].cache_level = 1;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_DATA_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[0].num_cu_shared = 
adev->gfx.config.gc_num_tcp_per_wpg / 2;
-               i++;
-       }
-       /* Scalar L1 Instruction Cache per SQC */
-       if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
-               pcache_info[i].cache_size =
-                       adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
-               pcache_info[i].cache_level = 1;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_INST_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_sqc_per_wgp * 2;
-               i++;
-       }
-       /* Scalar L1 Data Cache per SQC */
-       if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
-               pcache_info[i].cache_size = 
adev->gfx.config.gc_l1_data_cache_size_per_sqc;
-               pcache_info[i].cache_level = 1;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_DATA_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_sqc_per_wgp * 2;
-               i++;
-       }
-       /* GL1 Data Cache per SA */
-       if (adev->gfx.config.gc_gl1c_per_sa &&
-           adev->gfx.config.gc_gl1c_size_per_instance) {
-               pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
-                       adev->gfx.config.gc_gl1c_size_per_instance;
-               pcache_info[i].cache_level = 1;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_DATA_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
-               i++;
-       }
-       /* L2 Data Cache per GPU (Total Tex Cache) */
-       if (adev->gfx.config.gc_gl2c_per_gpu) {
-               pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
-               pcache_info[i].cache_level = 2;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_DATA_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
-               i++;
-       }
-       /* L3 Data Cache per GPU */
-       if (adev->gmc.mall_size) {
-               pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
-               pcache_info[i].cache_level = 3;
-               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
-                                       CRAT_CACHE_FLAGS_DATA_CACHE |
-                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
-               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
-               i++;
-       }
-       return i;
-}
-
-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
- * tables
- *
- *     @kdev - [IN] GPU device
- *     @gpu_processor_id - [IN] GPU processor ID to which these caches
- *                         associate
- *     @available_size - [IN] Amount of memory available in pcache
- *     @cu_info - [IN] Compute Unit info obtained from KGD
- *     @pcache - [OUT] memory into which cache data is to be filled in.
- *     @size_filled - [OUT] amount of data used up in pcache.
- *     @num_of_entries - [OUT] number of caches added
- */
-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
-                       int gpu_processor_id,
-                       int available_size,
-                       struct kfd_cu_info *cu_info,
-                       struct crat_subtype_cache *pcache,
-                       int *size_filled,
-                       int *num_of_entries)
-{
-       struct kfd_gpu_cache_info *pcache_info;
-       struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
-       int num_of_cache_types = 0;
-       int i, j, k;
-       int ct = 0;
-       int mem_available = available_size;
-       unsigned int cu_processor_id;
-       int ret;
-       unsigned int num_cu_shared;
-
-       switch (kdev->adev->asic_type) {
-       case CHIP_KAVERI:
-               pcache_info = kaveri_cache_info;
-               num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
-               break;
-       case CHIP_HAWAII:
-               pcache_info = hawaii_cache_info;
-               num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
-               break;
-       case CHIP_CARRIZO:
-               pcache_info = carrizo_cache_info;
-               num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
-               break;
-       case CHIP_TONGA:
-               pcache_info = tonga_cache_info;
-               num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
-               break;
-       case CHIP_FIJI:
-               pcache_info = fiji_cache_info;
-               num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
-               break;
-       case CHIP_POLARIS10:
-               pcache_info = polaris10_cache_info;
-               num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
-               break;
-       case CHIP_POLARIS11:
-               pcache_info = polaris11_cache_info;
-               num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
-               break;
-       case CHIP_POLARIS12:
-               pcache_info = polaris12_cache_info;
-               num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
-               break;
-       case CHIP_VEGAM:
-               pcache_info = vegam_cache_info;
-               num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
-               break;
-       default:
-               switch (KFD_GC_VERSION(kdev)) {
-               case IP_VERSION(9, 0, 1):
-                       pcache_info = vega10_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
-                       break;
-               case IP_VERSION(9, 2, 1):
-                       pcache_info = vega12_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
-                       break;
-               case IP_VERSION(9, 4, 0):
-               case IP_VERSION(9, 4, 1):
-                       pcache_info = vega20_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
-                       break;
-               case IP_VERSION(9, 4, 2):
-                       pcache_info = aldebaran_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
-                       break;
-               case IP_VERSION(9, 1, 0):
-               case IP_VERSION(9, 2, 2):
-                       pcache_info = raven_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(raven_cache_info);
-                       break;
-               case IP_VERSION(9, 3, 0):
-                       pcache_info = renoir_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
-                       break;
-               case IP_VERSION(10, 1, 10):
-               case IP_VERSION(10, 1, 2):
-               case IP_VERSION(10, 1, 3):
-               case IP_VERSION(10, 1, 4):
-                       pcache_info = navi10_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
-                       break;
-               case IP_VERSION(10, 1, 1):
-                       pcache_info = navi14_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 0):
-                       pcache_info = sienna_cichlid_cache_info;
-                       num_of_cache_types = 
ARRAY_SIZE(sienna_cichlid_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 2):
-                       pcache_info = navy_flounder_cache_info;
-                       num_of_cache_types = 
ARRAY_SIZE(navy_flounder_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 4):
-                       pcache_info = dimgrey_cavefish_cache_info;
-                       num_of_cache_types = 
ARRAY_SIZE(dimgrey_cavefish_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 1):
-                       pcache_info = vangogh_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 5):
-                       pcache_info = beige_goby_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 3):
-               case IP_VERSION(10, 3, 7): /* TODO: Double check these on 
production silicon */
-                       pcache_info = yellow_carp_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
-                       break;
-               case IP_VERSION(10, 3, 6):
-                       pcache_info = gc_10_3_6_cache_info;
-                       num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
-                       break;
-               case IP_VERSION(11, 0, 0):
-               case IP_VERSION(11, 0, 1):
-               case IP_VERSION(11, 0, 2):
-               case IP_VERSION(11, 0, 3):
-                       pcache_info = cache_info;
-                       num_of_cache_types =
-                               kfd_fill_gpu_cache_info_from_gfx_config(kdev, 
pcache_info);
-                       break;
-               default:
-                       return -EINVAL;
-               }
-       }
-
-       *size_filled = 0;
-       *num_of_entries = 0;
-
-       /* For each type of cache listed in the kfd_gpu_cache_info table,
-        * go through all available Compute Units.
-        * The [i,j,k] loop will
-        *              if kfd_gpu_cache_info.num_cu_shared = 1
-        *                      will parse through all available CU
-        *              If (kfd_gpu_cache_info.num_cu_shared != 1)
-        *                      then it will consider only one CU from
-        *                      the shared unit
-        */
-
-       for (ct = 0; ct < num_of_cache_types; ct++) {
-         cu_processor_id = gpu_processor_id;
-         if (pcache_info[ct].cache_level == 1) {
-           for (i = 0; i < cu_info->num_shader_engines; i++) {
-             for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
-               for (k = 0; k < cu_info->num_cu_per_sh;
-                 k += pcache_info[ct].num_cu_shared) {
-                 ret = fill_in_l1_pcache(pcache,
-                                       pcache_info,
-                                       cu_info,
-                                       mem_available,
-                                       cu_info->cu_bitmap[i % 4][j + i / 4],
-                                       ct,
-                                       cu_processor_id,
-                                       k);
-
-                 if (ret < 0)
-                       break;
-
-                 if (!ret) {
-                               pcache++;
-                               (*num_of_entries)++;
-                               mem_available -= sizeof(*pcache);
-                               (*size_filled) += sizeof(*pcache);
-                 }
-
-                 /* Move to next CU block */
-                 num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
-                                       cu_info->num_cu_per_sh) ?
-                                       pcache_info[ct].num_cu_shared :
-                                       (cu_info->num_cu_per_sh - k);
-                 cu_processor_id += num_cu_shared;
-               }
-             }
-           }
-         } else {
-                       ret = fill_in_l2_l3_pcache(pcache,
-                               pcache_info,
-                               cu_info,
-                               mem_available,
-                               ct,
-                               cu_processor_id);
-
-                       if (ret < 0)
-                               break;
-
-                       if (!ret) {
-                               pcache++;
-                               (*num_of_entries)++;
-                               mem_available -= sizeof(*pcache);
-                               (*size_filled) += sizeof(*pcache);
-                       }
-         }
-       }
-
-       pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
-
-       return 0;
-}
-
 static bool kfd_ignore_crat(void)
 {
        bool ret;
@@ -2203,8 +997,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
        struct crat_subtype_computeunit *cu;
        struct kfd_cu_info cu_info;
        int avail_size = *size;
-       int num_of_cache_entries = 0;
-       int cache_mem_filled = 0;
        uint32_t nid = 0;
        int ret = 0;
 
@@ -2304,31 +1096,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
        crat_table->length += sizeof(struct crat_subtype_memory);
        crat_table->total_entries++;
 
-       /* TODO: Fill in cache information. This information is NOT readily
-        * available in KGD
-        */
-       sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-               sub_type_hdr->length);
-       ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
-                               avail_size,
-                               &cu_info,
-                               (struct crat_subtype_cache *)sub_type_hdr,
-                               &cache_mem_filled,
-                               &num_of_cache_entries);
-
-       if (ret < 0)
-               return ret;
-
-       crat_table->length += cache_mem_filled;
-       crat_table->total_entries += num_of_cache_entries;
-       avail_size -= cache_mem_filled;
-
        /* Fill in Subtype: IO_LINKS
         *  Only direct links are added here which is Link from GPU to
         *  its NUMA node. Indirect links are added by userspace.
         */
        sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-               cache_mem_filled);
+               sub_type_hdr->length);
        ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
                (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index e0680d265a66..97e88c35be01 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -50,6 +50,747 @@ static struct kfd_system_properties sys_props;
 static DECLARE_RWSEM(topology_lock);
 static uint32_t topology_crat_proximity_domain;
 
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+       uint32_t        cache_size;
+       uint32_t        cache_level;
+       uint32_t        flags;
+       /* Indicates how many Compute Units share this cache
+        * within a SA. Value = 1 indicates the cache is not shared
+        */
+       uint32_t        num_cu_shared;
+};
+
+static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache (in SQC module) per bank */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache (in SQC module) per bank */
+               .cache_size = 8,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+
+       /* TODO: Add L2 Cache information */
+};
+
+static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache (in SQC module) per bank */
+               .cache_size = 8,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 4,
+       },
+       {
+               /* Scalar L1 Data Cache (in SQC module) per bank. */
+               .cache_size = 4,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 4,
+       },
+
+       /* TODO: Add L2 Cache information */
+};
+
+#define hawaii_cache_info kaveri_cache_info
+#define tonga_cache_info carrizo_cache_info
+#define fiji_cache_info  carrizo_cache_info
+#define polaris10_cache_info carrizo_cache_info
+#define polaris11_cache_info carrizo_cache_info
+#define polaris12_cache_info carrizo_cache_info
+#define vegam_cache_info carrizo_cache_info
+
+/* NOTE: L1 cache information has been updated and L2/L3
+ * cache information has been added for Vega10 and
+ * newer ASICs. The unit for cache_size is KiB.
+ * In future,  check & update cache details
+ * for every new ASIC is required.
+ */
+static struct kfd_gpu_cache_info vega10_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 16,
+       },
+};
+static struct kfd_gpu_cache_info raven_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 11,
+       },
+};
+
+static struct kfd_gpu_cache_info renoir_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+
+static struct kfd_gpu_cache_info vega12_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 5,
+       },
+};
+
+static struct kfd_gpu_cache_info vega20_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 8192,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 16,
+       },
+};
+
+static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 8192,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 14,
+       },
+};
+
+static struct kfd_gpu_cache_info navi10_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+};
+
+static struct kfd_gpu_cache_info vangogh_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+
+static struct kfd_gpu_cache_info navi14_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 12,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 12,
+       },
+};
+
+static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 128*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+};
+
+static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 3072,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 96*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+};
+
+static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 32*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+
+static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 16*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 6,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 6,
+       },
+};
+
 struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
                                                uint32_t proximity_domain)
 {
@@ -149,6 +890,7 @@ static void kfd_release_topology_device(struct 
kfd_topology_device *dev)
 {
        struct kfd_mem_properties *mem;
        struct kfd_cache_properties *cache;
+       struct kfd_cache_properties_ext *cache_ext;
        struct kfd_iolink_properties *iolink;
        struct kfd_iolink_properties *p2plink;
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
@@ -171,6 +913,13 @@ static void kfd_release_topology_device(struct 
kfd_topology_device *dev)
                kfree(cache);
        }
 
+       while (dev->cache_props_ext.next != &dev->cache_props_ext) {
+               cache_ext = container_of(dev->cache_props_ext.next,
+                               struct kfd_cache_properties_ext, list);
+               list_del(&cache_ext->list);
+               kfree(cache_ext);
+       }
+
        while (dev->io_link_props.next != &dev->io_link_props) {
                iolink = container_of(dev->io_link_props.next,
                                struct kfd_iolink_properties, list);
@@ -227,6 +976,7 @@ struct kfd_topology_device *kfd_create_topology_device(
 
        INIT_LIST_HEAD(&dev->mem_props);
        INIT_LIST_HEAD(&dev->cache_props);
+       INIT_LIST_HEAD(&dev->cache_props_ext);
        INIT_LIST_HEAD(&dev->io_link_props);
        INIT_LIST_HEAD(&dev->p2p_link_props);
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
@@ -387,7 +1137,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct 
attribute *attr,
 
        /* Making sure that the buffer is an empty string */
        buffer[0] = 0;
-
        cache = container_of(attr, struct kfd_cache_properties, attr);
        if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
                return -EPERM;
@@ -423,6 +1172,50 @@ static struct kobj_type cache_type = {
        .sysfs_ops = &cache_ops,
 };
 
+static ssize_t kfd_cache_ext_show(struct kobject *kobj, struct attribute *attr,
+               char *buffer)
+{
+       int offs = 0;
+       uint32_t i, j;
+       struct kfd_cache_properties_ext *cache;
+
+       /* Making sure that the buffer is an empty string */
+       buffer[0] = 0;
+       cache = container_of(attr, struct kfd_cache_properties_ext, attr);
+       if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+               return -EPERM;
+       sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
+                       cache->processor_id_low);
+       sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
+       sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
+       sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
+                             cache->cacheline_size);
+       sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
+                             cache->cachelines_per_tag);
+       sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+       sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+       sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
+       for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+               for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
+                       /* Check each bit */
+                       offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+                                        (cache->sibling_map[i] >> j) & 1);
+
+       /* Replace the last "," with end of line */
+       buffer[offs-1] = '\n';
+       return offs;
+}
+
+static const struct sysfs_ops cache_ext_ops = {
+       .show = kfd_cache_ext_show,
+};
+
+static struct kobj_type cache_ext_type = {
+       .release = kfd_topology_kobj_release,
+       .sysfs_ops = &cache_ext_ops,
+};
+
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
 /****** Sysfs of Performance Counters ******/
 
@@ -610,6 +1403,7 @@ static void kfd_remove_sysfs_node_entry(struct 
kfd_topology_device *dev)
        struct kfd_iolink_properties *p2plink;
        struct kfd_iolink_properties *iolink;
        struct kfd_cache_properties *cache;
+       struct kfd_cache_properties_ext *cache_ext;
        struct kfd_mem_properties *mem;
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
        struct kfd_perf_properties *perf;
@@ -663,6 +1457,18 @@ static void kfd_remove_sysfs_node_entry(struct 
kfd_topology_device *dev)
                dev->kobj_cache = NULL;
        }
 
+       if (dev->kobj_cache_ext) {
+               list_for_each_entry(cache_ext, &dev->cache_props_ext, list)
+                       if (cache_ext->kobj) {
+                               kfd_remove_sysfs_file(cache_ext->kobj,
+                                                       &cache_ext->attr);
+                               cache_ext->kobj = NULL;
+                       }
+               kobject_del(dev->kobj_cache_ext);
+               kobject_put(dev->kobj_cache_ext);
+               dev->kobj_cache_ext = NULL;
+       }
+
        if (dev->kobj_mem) {
                list_for_each_entry(mem, &dev->mem_props, list)
                        if (mem->kobj) {
@@ -707,6 +1513,7 @@ static int kfd_build_sysfs_node_entry(struct 
kfd_topology_device *dev,
        struct kfd_iolink_properties *p2plink;
        struct kfd_iolink_properties *iolink;
        struct kfd_cache_properties *cache;
+       struct kfd_cache_properties_ext *cache_ext;
        struct kfd_mem_properties *mem;
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
        struct kfd_perf_properties *perf;
@@ -741,6 +1548,10 @@ static int kfd_build_sysfs_node_entry(struct 
kfd_topology_device *dev,
        if (!dev->kobj_cache)
                return -ENOMEM;
 
+       dev->kobj_cache_ext = kobject_create_and_add("caches_ext", 
dev->kobj_node);
+       if (!dev->kobj_cache_ext)
+               return -ENOMEM;
+
        dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
        if (!dev->kobj_iolink)
                return -ENOMEM;
@@ -830,6 +1641,28 @@ static int kfd_build_sysfs_node_entry(struct 
kfd_topology_device *dev,
                i++;
        }
 
+       i = 0;
+       list_for_each_entry(cache_ext, &dev->cache_props_ext, list) {
+               cache_ext->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+               if (!cache_ext->kobj)
+                       return -ENOMEM;
+               ret = kobject_init_and_add(cache_ext->kobj, &cache_ext_type,
+                               dev->kobj_cache_ext, "%d", i);
+               if (ret < 0) {
+                       kobject_put(cache_ext->kobj);
+                       return ret;
+               }
+
+               cache_ext->attr.name = "properties";
+               cache_ext->attr.mode = KFD_SYSFS_FILE_MODE;
+               sysfs_attr_init(&cache_ext->attr);
+               ret = sysfs_create_file(cache_ext->kobj, &cache_ext->attr);
+               if (ret < 0)
+                       return ret;
+               i++;
+       }
+
+
        i = 0;
        list_for_each_entry(iolink, &dev->io_link_props, list) {
                iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -1268,6 +2101,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct 
kfd_dev *gpu)
        struct kfd_topology_device *out_dev = NULL;
        struct kfd_mem_properties *mem;
        struct kfd_cache_properties *cache;
+       struct kfd_cache_properties_ext *cache_ext;
        struct kfd_iolink_properties *iolink;
        struct kfd_iolink_properties *p2plink;
 
@@ -1288,6 +2122,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct 
kfd_dev *gpu)
                                mem->gpu = dev->gpu;
                        list_for_each_entry(cache, &dev->cache_props, list)
                                cache->gpu = dev->gpu;
+                       list_for_each_entry(cache, &dev->cache_props_ext, list)
+                               cache_ext->gpu = dev->gpu;
                        list_for_each_entry(iolink, &dev->io_link_props, list)
                                iolink->gpu = dev->gpu;
                        list_for_each_entry(p2plink, &dev->p2p_link_props, list)
@@ -1721,6 +2557,397 @@ static void kfd_topology_set_capabilities(struct 
kfd_topology_device *dev)
                dev->node_props.capability |= 
HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
 }
 
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l1_pcache(struct kfd_cache_properties_ext **props_ext,
+                               struct kfd_gpu_cache_info *pcache_info,
+                               struct kfd_cu_info *cu_info,
+                               int cu_bitmask,
+                               int cache_type, unsigned int cu_processor_id,
+                               int cu_block)
+{
+       unsigned int cu_sibling_map_mask;
+       int first_active_cu;
+       struct kfd_cache_properties_ext *pcache = NULL;
+
+       cu_sibling_map_mask = cu_bitmask;
+       cu_sibling_map_mask >>= cu_block;
+       cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 
1);
+       first_active_cu = ffs(cu_sibling_map_mask);
+
+       /* CU could be inactive. In case of shared cache find the first active
+        * CU. and incase of non-shared cache check if the CU is inactive. If
+        * inactive active skip it
+        */
+       if (first_active_cu) {
+               pcache = kfd_alloc_struct(pcache);
+               if (!pcache)
+                       return -ENOMEM;
+
+               memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
+               pcache->processor_id_low = cu_processor_id + (first_active_cu - 
1);
+               pcache->cache_level = pcache_info[cache_type].cache_level;
+               pcache->cache_size = pcache_info[cache_type].cache_size;
+
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+               /* Sibling map is w.r.t processor_id_low, so shift out
+                * inactive CU
+                */
+               cu_sibling_map_mask =
+                       cu_sibling_map_mask >> (first_active_cu - 1);
+
+               pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+               pcache->sibling_map[1] =
+                               (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+               pcache->sibling_map[2] =
+                               (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+               pcache->sibling_map[3] =
+                               (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+
+               *props_ext = pcache;
+
+               return 0;
+       }
+       return 1;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct kfd_cache_properties_ext **props_ext,
+                               struct kfd_gpu_cache_info *pcache_info,
+                               struct kfd_cu_info *cu_info,
+                               int cache_type, unsigned int cu_processor_id)
+{
+       unsigned int cu_sibling_map_mask;
+       int first_active_cu;
+       int i, j, k;
+       struct kfd_cache_properties_ext *pcache = NULL;
+
+       cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+       cu_sibling_map_mask &=
+               ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+       first_active_cu = ffs(cu_sibling_map_mask);
+
+       /* CU could be inactive. In case of shared cache find the first active
+        * CU. and incase of non-shared cache check if the CU is inactive. If
+        * inactive active skip it
+        */
+       if (first_active_cu) {
+               pcache = kfd_alloc_struct(pcache);
+               if (!pcache)
+                       return -ENOMEM;
+
+               memset(pcache, 0, sizeof(struct kfd_cache_properties_ext));
+               pcache->processor_id_low = cu_processor_id
+                                       + (first_active_cu - 1);
+               pcache->cache_level = pcache_info[cache_type].cache_level;
+               pcache->cache_size = pcache_info[cache_type].cache_size;
+
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+               if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+                       pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+               /* Sibling map is w.r.t processor_id_low, so shift out
+                * inactive CU
+                */
+               cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 
1);
+               k = 0;
+
+               for (i = 0; i < cu_info->num_shader_engines; i++) {
+                       for (j = 0; j < cu_info->num_shader_arrays_per_engine; 
j++) {
+                               pcache->sibling_map[k] = 
(uint8_t)(cu_sibling_map_mask & 0xFF);
+                               pcache->sibling_map[k+1] = 
(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+                               pcache->sibling_map[k+2] = 
(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+                               pcache->sibling_map[k+3] = 
(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+                               k += 4;
+
+                               cu_sibling_map_mask = cu_info->cu_bitmap[i % 
4][j + i / 4];
+                               cu_sibling_map_mask &= ((1 << 
pcache_info[cache_type].num_cu_shared) - 1);
+                       }
+               }
+               *props_ext = pcache;
+               return 0;
+       }
+       return 1;
+}
+
+#define KFD_MAX_CACHE_TYPES 6
+
+static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+                                                  struct kfd_gpu_cache_info 
*pcache_info)
+{
+       struct amdgpu_device *adev = kdev->adev;
+       int i = 0;
+
+       /* TCP L1 Cache per CU */
+       if (adev->gfx.config.gc_tcp_l1_size) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
+               pcache_info[i].cache_level = 1;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[0].num_cu_shared = 
adev->gfx.config.gc_num_tcp_per_wpg / 2;
+               i++;
+       }
+       /* Scalar L1 Instruction Cache per SQC */
+       if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+               pcache_info[i].cache_size =
+                       adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+               pcache_info[i].cache_level = 1;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_INST_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_sqc_per_wgp * 2;
+               i++;
+       }
+       /* Scalar L1 Data Cache per SQC */
+       if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+               pcache_info[i].cache_size = 
adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+               pcache_info[i].cache_level = 1;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[i].num_cu_shared = 
adev->gfx.config.gc_num_sqc_per_wgp * 2;
+               i++;
+       }
+       /* GL1 Data Cache per SA */
+       if (adev->gfx.config.gc_gl1c_per_sa &&
+               adev->gfx.config.gc_gl1c_size_per_instance) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
+                       adev->gfx.config.gc_gl1c_size_per_instance;
+               pcache_info[i].cache_level = 1;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+               i++;
+       }
+       /* L2 Data Cache per GPU (Total Tex Cache) */
+       if (adev->gfx.config.gc_gl2c_per_gpu) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+               pcache_info[i].cache_level = 2;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+               i++;
+       }
+       /* L3 Data Cache per GPU */
+       if (adev->gmc.mall_size) {
+               pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+               pcache_info[i].cache_level = 3;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+               pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+               i++;
+       }
+       return i;
+}
+/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ */
+static int kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, 
struct kfd_dev *kdev)
+{
+       struct kfd_gpu_cache_info *pcache_info = NULL;
+       struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+       int num_of_cache_types = 0;
+       int i, j, k;
+       int ct = 0;
+       unsigned int cu_processor_id;
+       int ret;
+       unsigned int num_cu_shared;
+       struct kfd_cu_info cu_info;
+       struct kfd_cu_info *pcu_info;
+       int gpu_processor_id;
+       struct kfd_cache_properties_ext *props_ext;
+       int num_of_entries = 0;
+
+       amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+       pcu_info = &cu_info;
+
+       gpu_processor_id = kdev->processor_id_low;
+
+       switch (kdev->adev->asic_type) {
+       case CHIP_KAVERI:
+               pcache_info = kaveri_cache_info;
+               num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+               break;
+       case CHIP_HAWAII:
+               pcache_info = hawaii_cache_info;
+               num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+               break;
+       case CHIP_CARRIZO:
+               pcache_info = carrizo_cache_info;
+               num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+               break;
+       case CHIP_TONGA:
+               pcache_info = tonga_cache_info;
+               num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+               break;
+       case CHIP_FIJI:
+               pcache_info = fiji_cache_info;
+               num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+               break;
+       case CHIP_POLARIS10:
+               pcache_info = polaris10_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+               break;
+       case CHIP_POLARIS11:
+               pcache_info = polaris11_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+               break;
+       case CHIP_POLARIS12:
+               pcache_info = polaris12_cache_info;
+               num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
+               break;
+       case CHIP_VEGAM:
+               pcache_info = vegam_cache_info;
+               num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+               break;
+       default:
+               switch (KFD_GC_VERSION(kdev)) {
+               case IP_VERSION(9, 0, 1):
+                       pcache_info = vega10_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+                       break;
+               case IP_VERSION(9, 2, 1):
+                       pcache_info = vega12_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+                       break;
+               case IP_VERSION(9, 4, 0):
+               case IP_VERSION(9, 4, 1):
+                       pcache_info = vega20_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+                       break;
+               case IP_VERSION(9, 4, 2):
+                       pcache_info = aldebaran_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+                       break;
+               case IP_VERSION(9, 1, 0):
+               case IP_VERSION(9, 2, 2):
+                       pcache_info = raven_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+                       break;
+               case IP_VERSION(9, 3, 0):
+                       pcache_info = renoir_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+                       break;
+               case IP_VERSION(10, 1, 10):
+               case IP_VERSION(10, 1, 2):
+               case IP_VERSION(10, 1, 3):
+               case IP_VERSION(10, 1, 4):
+                       pcache_info = navi10_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+                       break;
+               case IP_VERSION(10, 1, 1):
+                       pcache_info = navi14_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 0):
+                       pcache_info = sienna_cichlid_cache_info;
+                       num_of_cache_types = 
ARRAY_SIZE(sienna_cichlid_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 2):
+                       pcache_info = navy_flounder_cache_info;
+                       num_of_cache_types = 
ARRAY_SIZE(navy_flounder_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 4):
+                       pcache_info = dimgrey_cavefish_cache_info;
+                       num_of_cache_types = 
ARRAY_SIZE(dimgrey_cavefish_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 1):
+                       pcache_info = vangogh_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 5):
+                       pcache_info = beige_goby_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+                       break;
+               case IP_VERSION(10, 3, 3):
+               case IP_VERSION(10, 3, 6): /* TODO: Double check these on 
production silicon */
+               case IP_VERSION(10, 3, 7): /* TODO: Double check these on 
production silicon */
+                       pcache_info = yellow_carp_cache_info;
+                       num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+                       break;
+               case IP_VERSION(11, 0, 0):
+               case IP_VERSION(11, 0, 1):
+               case IP_VERSION(11, 0, 2):
+               case IP_VERSION(11, 0, 3):
+                       pcache_info = cache_info;
+                       num_of_cache_types =
+                               kfd_fill_gpu_cache_info_from_gfx_config(kdev, 
pcache_info);
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       /* For each type of cache listed in the kfd_gpu_cache_info table,
+        * go through all available Compute Units.
+        * The [i,j,k] loop will
+        *              if kfd_gpu_cache_info.num_cu_shared = 1
+        *                      will parse through all available CU
+        *              If (kfd_gpu_cache_info.num_cu_shared != 1)
+        *                      then it will consider only one CU from
+        *                      the shared unit
+        */
+       for (ct = 0; ct < num_of_cache_types; ct++) {
+               cu_processor_id = gpu_processor_id;
+               if (pcache_info[ct].cache_level == 1) {
+                       for (i = 0; i < pcu_info->num_shader_engines; i++) {
+                               for (j = 0; j < 
pcu_info->num_shader_arrays_per_engine; j++) {
+                                       for (k = 0; k < 
pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+                                               ret = 
fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+                                                                               
pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
+                                                                               
cu_processor_id, k);
+
+                                               if (ret < 0)
+                                                       break;
+
+                                               if (!ret) {
+                                                       num_of_entries++;
+                                                       
list_add_tail(&props_ext->list, &dev->cache_props_ext);
+                                               }
+
+                                               /* Move to next CU block */
+                                               num_cu_shared = ((k + 
pcache_info[ct].num_cu_shared) <=
+                                                       
pcu_info->num_cu_per_sh) ?
+                                                       
pcache_info[ct].num_cu_shared :
+                                                       
(pcu_info->num_cu_per_sh - k);
+                                               cu_processor_id += 
num_cu_shared;
+                                       }
+                               }
+                       }
+               } else {
+                       ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
+                                                               pcu_info, ct, 
cu_processor_id);
+
+                       if (ret < 0)
+                               break;
+
+                       if (!ret) {
+                               num_of_entries++;
+                               list_add_tail(&props_ext->list, 
&dev->cache_props_ext);
+                       }
+               }
+       }
+       pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
+       return 0;
+}
+
 int kfd_topology_add_device(struct kfd_dev *gpu)
 {
        uint32_t gpu_id;
@@ -1759,6 +2986,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
                        topology_crat_proximity_domain--;
                        return res;
                }
+
                res = kfd_parse_crat_table(crat_image,
                                           &temp_topology_device_list,
                                           proximity_domain);
@@ -1771,23 +2999,27 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 
                kfd_topology_update_device_list(&temp_topology_device_list,
                        &topology_device_list);
+               up_write(&topology_lock);
+
+               dev = kfd_assign_gpu(gpu);
+               if (WARN_ON(!dev)) {
+                       res = -ENODEV;
+                       goto err;
+               }
+
+               down_write(&topology_lock);
+               kfd_fill_cache_non_crat_info(dev, gpu);
 
                /* Update the SYSFS tree, since we added another topology
                 * device
                 */
                res = kfd_topology_update_sysfs();
                up_write(&topology_lock);
-
                if (!res)
                        sys_props.generation_count++;
                else
                        pr_err("Failed to update GPU (ID: 0x%x) to sysfs 
topology. res=%d\n",
                                                gpu_id, res);
-               dev = kfd_assign_gpu(gpu);
-               if (WARN_ON(!dev)) {
-                       res = -ENODEV;
-                       goto err;
-               }
        }
 
        dev->gpu_id = gpu_id;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index dc4e239c8f8f..fc35fe9fa914 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -103,6 +103,25 @@ struct kfd_cache_properties {
        struct attribute        attr;
 };
 
+#define VCRAT_SIBLINGMAP_SIZE 64
+
+/* for GPUs with more CUs */
+struct kfd_cache_properties_ext {
+       struct list_head        list;
+       uint32_t                processor_id_low;
+       uint32_t                cache_level;
+       uint32_t                cache_size;
+       uint32_t                cacheline_size;
+       uint32_t                cachelines_per_tag;
+       uint32_t                cache_assoc;
+       uint32_t                cache_latency;
+       uint32_t                cache_type;
+       uint8_t                 sibling_map[VCRAT_SIBLINGMAP_SIZE];
+       struct kfd_dev          *gpu;
+       struct kobject          *kobj;
+       struct attribute        attr;
+};
+
 struct kfd_iolink_properties {
        struct list_head        list;
        uint32_t                iolink_type;
@@ -139,6 +158,7 @@ struct kfd_topology_device {
        struct list_head                mem_props;
        uint32_t                        cache_count;
        struct list_head                cache_props;
+       struct list_head                cache_props_ext;
        struct list_head                io_link_props;
        struct list_head                p2p_link_props;
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
@@ -148,6 +168,7 @@ struct kfd_topology_device {
        struct kobject                  *kobj_node;
        struct kobject                  *kobj_mem;
        struct kobject                  *kobj_cache;
+       struct kobject                  *kobj_cache_ext;
        struct kobject                  *kobj_iolink;
        struct kobject                  *kobj_p2plink;
 #ifdef HAVE_AMD_IOMMU_PC_SUPPORTED
-- 
2.25.1


Reply via email to