On Sat, May 31, 2014 at 7:13 AM, Bruno Jimenez <brunoji...@gmail.com> wrote:
> On Fri, 2014-05-30 at 19:33 -0400, Alex Deucher wrote:
>> On Fri, May 30, 2014 at 11:31 AM, Bruno Jiménez <brunoji...@gmail.com> wrote:
>> > The data has been extracted from:
>> > AMD Accelerated Parallel Processing OpenCL Programming Guide (rev 2.7)
>> > Appendix D: Device Parameters
>>
>> You should add a query for the number of compute units to the
>> RADEON_INFO ioctl and then just ask the kernel how many CUs/SIMDs the
>> hw has.  This will properly handle all boards (harvest, etc.) since we
>> can read the actual number of CUs off the GPU.
>>
>> Alex
>
> Hi,
>
> At first I tried to do so (as for the maximum clock frequency), but I
> couldn't find how to query that value, nor many docs about what I could
> ask the kernel for.
>
> I think I have found now the appropiate docs, and I will try again to
> query the kernel later.

You'd need to add a new query.  It doesn't look like we expose this
yet.  The attached untested patch should mostly do the trick.

Alex


>
> Sorry for any inconvenience.
> Bruno
>
>>
>> > ---
>> >  src/gallium/drivers/radeon/r600_pipe_common.c | 90 
>> > +++++++++++++++++++++++++++
>> >  1 file changed, 90 insertions(+)
>> >
>> > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
>> > b/src/gallium/drivers/radeon/r600_pipe_common.c
>> > index 70c4d1a..c4abacd 100644
>> > --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>> > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>> > @@ -422,6 +422,89 @@ const char *r600_get_llvm_processor_name(enum 
>> > radeon_family family)
>> >         }
>> >  }
>> >
>> > +static uint32_t radeon_max_compute_units(enum radeon_family family)
>> > +{
>> > +       switch (family) {
>> > +       case CHIP_CEDAR:
>> > +               return 2;
>> > +
>> > +       /* Redwood PRO2: 4
>> > +        * Redwood PRO:  5
>> > +        * Redwood XT:   5 */
>> > +       case CHIP_REDWOOD:
>> > +               return 4;
>> > +
>> > +       /* Juniper LE:  9
>> > +        * Juniper XT: 10 */
>> > +       case CHIP_JUNIPER:
>> > +               return 9;
>> > +
>> > +       /* Cypress LE:  14
>> > +        * Cypress PRO: 18
>> > +        * Cypress XT:  20 */
>> > +       case CHIP_CYPRESS:
>> > +               return 14;
>> > +
>> > +       case CHIP_HEMLOCK:
>> > +               return 40;
>> > +
>> > +       /* XXX: is Zacate really equal to Ontario?
>> > +        * Zacate E-350: 2
>> > +        * Zacate E-240: 2
>> > +        * Ontario C-50: 2
>> > +        * Ontario C-30: 2 */
>> > +       case CHIP_PALM:
>> > +               return 2;
>> > +
>> > +       /* Caicos:      2
>> > +        * Seymour LP:  2
>> > +        * Seymour PRO: 2
>> > +        * Seymour XT:  2
>> > +        * Seymour XTX: 2 */
>> > +       case CHIP_CAICOS:
>> > +               return 2;
>> > +
>> > +       /* Turks PRO:    6
>> > +        * Turks XT:     6
>> > +        * Whistler LP:  6
>> > +        * Whistler PRO: 6
>> > +        * Whistler XT:  6 */
>> > +       case CHIP_TURKS:
>> > +               return 6;
>> > +
>> > +       /* Barts LE:      10
>> > +        * Barts PRO:     12
>> > +        * Barts XT:      14
>> > +        * Blackcomb PRO: 12 */
>> > +       case CHIP_BARTS:
>> > +               return 10;
>> > +
>> > +       /* Cayman PRO: 22
>> > +        * Cayman XT:  24
>> > +        * Cayman Gemini: 48 */
>> > +       case CHIP_CAYMAN:
>> > +               return 22;
>> > +
>> > +       /* Verde PRO:  8
>> > +        * Verde XT:  10 */
>> > +       case CHIP_VERDE:
>> > +               return 8;
>> > +
>> > +       /* Pitcairn PRO: 16
>> > +        * Pitcairn XT:  20 */
>> > +       case CHIP_PITCAIRN:
>> > +               return 16;
>> > +
>> > +       /* Tahiti PRO: 28
>> > +        * Tahiti XT:  32 */
>> > +       case CHIP_TAHITI:
>> > +               return 28;
>> > +
>> > +       default:
>> > +               return 1;
>> > +       }
>> > +}
>> > +
>> >  static int r600_get_compute_param(struct pipe_screen *screen,
>> >          enum pipe_compute_cap param,
>> >          void *ret)
>> > @@ -519,6 +602,13 @@ static int r600_get_compute_param(struct pipe_screen 
>> > *screen,
>> >                 }
>> >                 return sizeof(uint32_t);
>> >
>> > +       case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
>> > +               if (ret) {
>> > +                       uint32_t *max_compute_units = ret;
>> > +                       *max_compute_units = 
>> > radeon_max_compute_units(rscreen->family);
>> > +               }
>> > +               return sizeof(uint32_t);
>> > +
>> >         default:
>> >                 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
>> >                 return 0;
>> > --
>> > 1.9.3
>> >
>> > _______________________________________________
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
From 7342a3351328bc0fcb9fbd3588bf28b3f20fa4e1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deuc...@amd.com>
Date: Mon, 2 Jun 2014 16:13:21 -0400
Subject: [PATCH] drm/radeon: add query for number of active CUs

Query to find out how many compute units on a GPU.
Useful for OpenCL usermode drivers.

Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---
 drivers/gpu/drm/radeon/cik.c        | 12 +++++++++++-
 drivers/gpu/drm/radeon/evergreen.c  | 12 ++++++++++++
 drivers/gpu/drm/radeon/ni.c         | 12 ++++++++++++
 drivers/gpu/drm/radeon/r600.c       |  3 +++
 drivers/gpu/drm/radeon/radeon.h     |  6 ++++++
 drivers/gpu/drm/radeon/radeon_drv.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_kms.c | 16 ++++++++++++++++
 drivers/gpu/drm/radeon/rv770.c      |  3 +++
 drivers/gpu/drm/radeon/si.c         | 11 ++++++++++-
 include/uapi/drm/radeon_drm.h       |  2 +-
 10 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index a518140..54c425f 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -74,6 +74,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3191,7 +3192,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	u32 mc_shared_chmap, mc_arb_ramcfg;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
@@ -3379,6 +3380,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
 		     rdev->config.cik.max_sh_per_se,
 		     rdev->config.cik.max_backends_per_se);
 
+	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+				rdev->config.cik.active_cus +=
+					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
+
 	/* set HW defaults for 3D engine */
 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 0318230..798afb4 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.evergreen.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 1d3209f..abef068 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.cayman.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 436e550..8b01d68a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
 	if (tmp < rdev->config.r600.max_simds) {
 		rdev->config.r600.max_simds = tmp;
 	}
+	tmp = rdev->config.r600.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
+	rdev->config.r600.active_simds = tmp;
 
 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
 	tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index dd4da88..5ea179e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1927,6 +1927,7 @@ struct r600_asic {
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct rv770_asic {
@@ -1952,6 +1953,7 @@ struct rv770_asic {
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct evergreen_asic {
@@ -1978,6 +1980,7 @@ struct evergreen_asic {
 	unsigned tiling_group_size;
 	unsigned tile_config;
 	unsigned backend_map;
+	unsigned active_simds;
 };
 
 struct cayman_asic {
@@ -2016,6 +2019,7 @@ struct cayman_asic {
 	unsigned multi_gpu_tile_size;
 
 	unsigned tile_config;
+	unsigned active_simds;
 };
 
 struct si_asic {
@@ -2046,6 +2050,7 @@ struct si_asic {
 
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
+	uint32_t active_cus;
 };
 
 struct cik_asic {
@@ -2077,6 +2082,7 @@ struct cik_asic {
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
+	uint32_t active_cus;
 };
 
 union radeon_asic_config {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 15447a41..2f18d74 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -81,9 +81,10 @@
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	38
+#define KMS_DRIVER_MINOR	39
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 0cc47f1..f3fe789 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		value_size = sizeof(uint64_t);
 		value64 = atomic64_read(&rdev->gtt_usage);
 		break;
+	case RADEON_INFO_ACTIVE_CU_COUNT:
+		if (rdev->family >= CHIP_BONAIRE)
+			*value = rdev->config.cik.active_cus;
+		else if (rdev->family >= CHIP_TAHITI)
+			*value = rdev->config.si.active_cus;
+		else if (rdev->family >= CHIP_CAYMAN)
+			*value = rdev->config.cayman.active_simds;
+		else if (rdev->family >= CHIP_CEDAR)
+			*value = rdev->config.evergreen.active_simds;
+		else if (rdev->family >= CHIP_RV770)
+			*value = rdev->config.rv770.active_simds;
+		else if (rdev->family >= CHIP_R600)
+			*value = rdev->config.r600.active_simds;
+		else
+			*value = 1;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 97b7766..da8703d 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	if (tmp < rdev->config.rv770.max_simds) {
 		rdev->config.rv770.max_simds = tmp;
 	}
+	tmp = rdev->config.rv770.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
+	rdev->config.rv770.active_simds = tmp;
 
 	switch (rdev->config.rv770.max_tile_pipes) {
 	case 1:
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 5c1c0c7..486682b 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
 	u32 sx_debug_1;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
 		     rdev->config.si.max_sh_per_se,
 		     rdev->config.si.max_cu_per_sh);
 
+	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+				rdev->config.si.active_cus +=
+					hweight32(si_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
 
 	/* set HW defaults for 3D engine */
 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index aefa2f6..1cc0b61 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_NUM_BYTES_MOVED	0x1d
 #define RADEON_INFO_VRAM_USAGE		0x1e
 #define RADEON_INFO_GTT_USAGE		0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT	0x20
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
1.8.3.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to