Query to find out how many compute units on a GPU.
Useful for OpenCL usermode drivers.

Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/radeon/cik.c        | 12 +++++++++++-
 drivers/gpu/drm/radeon/evergreen.c  | 12 ++++++++++++
 drivers/gpu/drm/radeon/ni.c         | 12 ++++++++++++
 drivers/gpu/drm/radeon/r600.c       |  3 +++
 drivers/gpu/drm/radeon/radeon.h     |  6 ++++++
 drivers/gpu/drm/radeon/radeon_drv.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_kms.c | 16 ++++++++++++++++
 drivers/gpu/drm/radeon/rv770.c      |  3 +++
 drivers/gpu/drm/radeon/si.c         | 11 ++++++++++-
 include/uapi/drm/radeon_drm.h       |  2 +-
 10 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index e4b2f2b..dcd4518 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -80,6 +80,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc 
*mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 
sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3257,7 +3258,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
        u32 mc_shared_chmap, mc_arb_ramcfg;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;

        switch (rdev->family) {
        case CHIP_BONAIRE:
@@ -3446,6 +3447,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
                     rdev->config.cik.max_sh_per_se,
                     rdev->config.cik.max_backends_per_se);

+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+                               rdev->config.cik.active_cus +=
+                                       
hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+                       }
+               }
+       }
+
        /* set HW defaults for 3D engine */
        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 653eff8..e2f6052 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device 
*rdev)
                        disabled_rb_mask &= ~(1 << i);
        }

+       for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 
0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << 
rdev->config.evergreen.max_simds;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.evergreen.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index c0fd8f6..5a33ca6 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
                        disabled_rb_mask &= ~(1 << i);
        }

+       for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 
0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << 
rdev->config.cayman.max_simds_per_se;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.cayman.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c2ff17c..c66952d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.r600.max_simds) {
                rdev->config.r600.max_simds = tmp;
        }
+       tmp = rdev->config.r600.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & 
R6XX_MAX_SIMDS_MASK);
+       rdev->config.r600.active_simds = tmp;

        disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & 
R6XX_MAX_BACKENDS_MASK;
        tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index dd77111..4b0bbf8 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1932,6 +1932,7 @@ struct r600_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };

 struct rv770_asic {
@@ -1957,6 +1958,7 @@ struct rv770_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };

 struct evergreen_asic {
@@ -1983,6 +1985,7 @@ struct evergreen_asic {
        unsigned tiling_group_size;
        unsigned tile_config;
        unsigned backend_map;
+       unsigned active_simds;
 };

 struct cayman_asic {
@@ -2021,6 +2024,7 @@ struct cayman_asic {
        unsigned multi_gpu_tile_size;

        unsigned tile_config;
+       unsigned active_simds;
 };

 struct si_asic {
@@ -2051,6 +2055,7 @@ struct si_asic {

        unsigned tile_config;
        uint32_t tile_mode_array[32];
+       uint32_t active_cus;
 };

 struct cik_asic {
@@ -2082,6 +2087,7 @@ struct cik_asic {
        unsigned tile_config;
        uint32_t tile_mode_array[32];
        uint32_t macrotile_mode_array[16];
+       uint32_t active_cus;
 };

 union radeon_asic_config {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index b7a2ec2..6e30174 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -81,9 +81,10 @@
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       38
+#define KMS_DRIVER_MINOR       39
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c 
b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba..5cd70f9 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file
                value_size = sizeof(uint64_t);
                value64 = atomic64_read(&rdev->gtt_usage);
                break;
+       case RADEON_INFO_ACTIVE_CU_COUNT:
+               if (rdev->family >= CHIP_BONAIRE)
+                       *value = rdev->config.cik.active_cus;
+               else if (rdev->family >= CHIP_TAHITI)
+                       *value = rdev->config.si.active_cus;
+               else if (rdev->family >= CHIP_CAYMAN)
+                       *value = rdev->config.cayman.active_simds;
+               else if (rdev->family >= CHIP_CEDAR)
+                       *value = rdev->config.evergreen.active_simds;
+               else if (rdev->family >= CHIP_RV770)
+                       *value = rdev->config.rv770.active_simds;
+               else if (rdev->family >= CHIP_R600)
+                       *value = rdev->config.r600.active_simds;
+               else
+                       *value = 1;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 97b7766..da8703d 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.rv770.max_simds) {
                rdev->config.rv770.max_simds = tmp;
        }
+       tmp = rdev->config.rv770.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & 
R7XX_MAX_SIMDS_MASK);
+       rdev->config.rv770.active_simds = tmp;

        switch (rdev->config.rv770.max_tile_pipes) {
        case 1:
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ec13e8d..730cee2 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");

+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
        u32 sx_debug_1;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;

        switch (rdev->family) {
        case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
                     rdev->config.si.max_sh_per_se,
                     rdev->config.si.max_cu_per_sh);

+       for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+                               rdev->config.si.active_cus +=
+                                       hweight32(si_get_cu_active_bitmap(rdev, 
i, j));
+                       }
+               }
+       }

        /* set HW defaults for 3D engine */
        WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index aefa2f6..1cc0b61 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_NUM_BYTES_MOVED    0x1d
 #define RADEON_INFO_VRAM_USAGE         0x1e
 #define RADEON_INFO_GTT_USAGE          0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT    0x20

 struct drm_radeon_info {
        uint32_t                request;
-- 
1.8.3.1

Reply via email to