On Sat, May 31, 2014 at 7:13 AM, Bruno Jimenez <brunoji...@gmail.com> wrote: > On Fri, 2014-05-30 at 19:33 -0400, Alex Deucher wrote: >> On Fri, May 30, 2014 at 11:31 AM, Bruno Jiménez <brunoji...@gmail.com> wrote: >> > The data has been extracted from: >> > AMD Accelerated Parallel Processing OpenCL Programming Guide (rev 2.7) >> > Appendix D: Device Parameters >> >> You should add a query for the number of compute units to the >> RADEON_INFO ioctl and then just ask the kernel how many CUs/SIMDs the >> hw has. This will properly handle all boards (harvest, etc.) since we >> can read the actual number of CUs off the GPU. >> >> Alex > > Hi, > > At first I tried to do so (as for the maximum clock frequency), but I > couldn't find how to query that value, nor many docs about what I could > ask the kernel for. > > I think I have found now the appropiate docs, and I will try again to > query the kernel later.
You'd need to add a new query. It doesn't look like we expose this yet. The attached untested patch should mostly do the trick. Alex > > Sorry for any inconvenience. > Bruno > >> >> > --- >> > src/gallium/drivers/radeon/r600_pipe_common.c | 90 >> > +++++++++++++++++++++++++++ >> > 1 file changed, 90 insertions(+) >> > >> > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c >> > b/src/gallium/drivers/radeon/r600_pipe_common.c >> > index 70c4d1a..c4abacd 100644 >> > --- a/src/gallium/drivers/radeon/r600_pipe_common.c >> > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c >> > @@ -422,6 +422,89 @@ const char *r600_get_llvm_processor_name(enum >> > radeon_family family) >> > } >> > } >> > >> > +static uint32_t radeon_max_compute_units(enum radeon_family family) >> > +{ >> > + switch (family) { >> > + case CHIP_CEDAR: >> > + return 2; >> > + >> > + /* Redwood PRO2: 4 >> > + * Redwood PRO: 5 >> > + * Redwood XT: 5 */ >> > + case CHIP_REDWOOD: >> > + return 4; >> > + >> > + /* Juniper LE: 9 >> > + * Juniper XT: 10 */ >> > + case CHIP_JUNIPER: >> > + return 9; >> > + >> > + /* Cypress LE: 14 >> > + * Cypress PRO: 18 >> > + * Cypress XT: 20 */ >> > + case CHIP_CYPRESS: >> > + return 14; >> > + >> > + case CHIP_HEMLOCK: >> > + return 40; >> > + >> > + /* XXX: is Zacate really equal to Ontario? >> > + * Zacate E-350: 2 >> > + * Zacate E-240: 2 >> > + * Ontario C-50: 2 >> > + * Ontario C-30: 2 */ >> > + case CHIP_PALM: >> > + return 2; >> > + >> > + /* Caicos: 2 >> > + * Seymour LP: 2 >> > + * Seymour PRO: 2 >> > + * Seymour XT: 2 >> > + * Seymour XTX: 2 */ >> > + case CHIP_CAICOS: >> > + return 2; >> > + >> > + /* Turks PRO: 6 >> > + * Turks XT: 6 >> > + * Whistler LP: 6 >> > + * Whistler PRO: 6 >> > + * Whistler XT: 6 */ >> > + case CHIP_TURKS: >> > + return 6; >> > + >> > + /* Barts LE: 10 >> > + * Barts PRO: 12 >> > + * Barts XT: 14 >> > + * Blackcomb PRO: 12 */ >> > + case CHIP_BARTS: >> > + return 10; >> > + >> > + /* Cayman PRO: 22 >> > + * Cayman XT: 24 >> > + * Cayman Gemini: 48 */ >> > + case CHIP_CAYMAN: >> > + return 22; >> > + >> > + /* Verde PRO: 8 >> > + * Verde XT: 10 */ >> > + case CHIP_VERDE: >> > + return 8; >> > + >> > + /* Pitcairn PRO: 16 >> > + * Pitcairn XT: 20 */ >> > + case CHIP_PITCAIRN: >> > + return 16; >> > + >> > + /* Tahiti PRO: 28 >> > + * Tahiti XT: 32 */ >> > + case CHIP_TAHITI: >> > + return 28; >> > + >> > + default: >> > + return 1; >> > + } >> > +} >> > + >> > static int r600_get_compute_param(struct pipe_screen *screen, >> > enum pipe_compute_cap param, >> > void *ret) >> > @@ -519,6 +602,13 @@ static int r600_get_compute_param(struct pipe_screen >> > *screen, >> > } >> > return sizeof(uint32_t); >> > >> > + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: >> > + if (ret) { >> > + uint32_t *max_compute_units = ret; >> > + *max_compute_units = >> > radeon_max_compute_units(rscreen->family); >> > + } >> > + return sizeof(uint32_t); >> > + >> > default: >> > fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); >> > return 0; >> > -- >> > 1.9.3 >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > >
From 7342a3351328bc0fcb9fbd3588bf28b3f20fa4e1 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deuc...@amd.com> Date: Mon, 2 Jun 2014 16:13:21 -0400 Subject: [PATCH] drm/radeon: add query for number of active CUs Query to find out how many compute units on a GPU. Useful for OpenCL usermode drivers. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/radeon/cik.c | 12 +++++++++++- drivers/gpu/drm/radeon/evergreen.c | 12 ++++++++++++ drivers/gpu/drm/radeon/ni.c | 12 ++++++++++++ drivers/gpu/drm/radeon/r600.c | 3 +++ drivers/gpu/drm/radeon/radeon.h | 6 ++++++ drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- drivers/gpu/drm/radeon/radeon_kms.c | 16 ++++++++++++++++ drivers/gpu/drm/radeon/rv770.c | 3 +++ drivers/gpu/drm/radeon/si.c | 11 ++++++++++- include/uapi/drm/radeon_drm.h | 2 +- 10 files changed, 76 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a518140..54c425f 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -74,6 +74,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev); extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern void si_rlc_reset(struct radeon_device *rdev); extern void si_init_uvd_internal_cg(struct radeon_device *rdev); +static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh); extern int cik_sdma_resume(struct radeon_device *rdev); extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); extern void cik_sdma_fini(struct radeon_device *rdev); @@ -3191,7 +3192,7 @@ static void cik_gpu_init(struct radeon_device *rdev) u32 mc_shared_chmap, mc_arb_ramcfg; u32 hdp_host_path_cntl; u32 tmp; - int i, j; + int i, j, k; switch (rdev->family) { case CHIP_BONAIRE: @@ -3379,6 +3380,15 @@ static void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.max_sh_per_se, rdev->config.cik.max_backends_per_se); + for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { + for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { + for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) { + rdev->config.cik.active_cus += + hweight32(cik_get_cu_active_bitmap(rdev, i, j)); + } + } + } + /* set HW defaults for 3D engine */ WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 0318230..798afb4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev) disabled_rb_mask &= ~(1 << i); } + for (i = 0; i < rdev->config.evergreen.num_ses; i++) { + u32 simd_disable_bitmap; + + WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); + WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); + simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16; + simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds; + tmp <<= 16; + tmp |= simd_disable_bitmap; + } + rdev->config.evergreen.active_simds = hweight32(~tmp); + WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 1d3209f..abef068 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev) disabled_rb_mask &= ~(1 << i); } + for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) { + u32 simd_disable_bitmap; + + WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); + WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); + simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16; + simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se; + tmp <<= 16; + tmp |= simd_disable_bitmap; + } + rdev->config.cayman.active_simds = hweight32(~tmp); + WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 436e550..8b01d68a 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev) if (tmp < rdev->config.r600.max_simds) { rdev->config.r600.max_simds = tmp; } + tmp = rdev->config.r600.max_simds - + r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK); + rdev->config.r600.active_simds = tmp; disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK; tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dd4da88..5ea179e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1927,6 +1927,7 @@ struct r600_asic { unsigned tiling_group_size; unsigned tile_config; unsigned backend_map; + unsigned active_simds; }; struct rv770_asic { @@ -1952,6 +1953,7 @@ struct rv770_asic { unsigned tiling_group_size; unsigned tile_config; unsigned backend_map; + unsigned active_simds; }; struct evergreen_asic { @@ -1978,6 +1980,7 @@ struct evergreen_asic { unsigned tiling_group_size; unsigned tile_config; unsigned backend_map; + unsigned active_simds; }; struct cayman_asic { @@ -2016,6 +2019,7 @@ struct cayman_asic { unsigned multi_gpu_tile_size; unsigned tile_config; + unsigned active_simds; }; struct si_asic { @@ -2046,6 +2050,7 @@ struct si_asic { unsigned tile_config; uint32_t tile_mode_array[32]; + uint32_t active_cus; }; struct cik_asic { @@ -2077,6 +2082,7 @@ struct cik_asic { unsigned tile_config; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; + uint32_t active_cus; }; union radeon_asic_config { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 15447a41..2f18d74 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -81,9 +81,10 @@ * 2.37.0 - allow GS ring setup on r6xx/r7xx * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG + * 2.39.0 - Add INFO query for number of active CUs */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 38 +#define KMS_DRIVER_MINOR 39 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0cc47f1..f3fe789 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file value_size = sizeof(uint64_t); value64 = atomic64_read(&rdev->gtt_usage); break; + case RADEON_INFO_ACTIVE_CU_COUNT: + if (rdev->family >= CHIP_BONAIRE) + *value = rdev->config.cik.active_cus; + else if (rdev->family >= CHIP_TAHITI) + *value = rdev->config.si.active_cus; + else if (rdev->family >= CHIP_CAYMAN) + *value = rdev->config.cayman.active_simds; + else if (rdev->family >= CHIP_CEDAR) + *value = rdev->config.evergreen.active_simds; + else if (rdev->family >= CHIP_RV770) + *value = rdev->config.rv770.active_simds; + else if (rdev->family >= CHIP_R600) + *value = rdev->config.r600.active_simds; + else + *value = 1; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 97b7766..da8703d 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev) if (tmp < rdev->config.rv770.max_simds) { rdev->config.rv770.max_simds = tmp; } + tmp = rdev->config.rv770.max_simds - + r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK); + rdev->config.rv770.active_simds = tmp; switch (rdev->config.rv770.max_tile_pipes) { case 1: diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5c1c0c7..486682b 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin"); MODULE_FIRMWARE("radeon/HAINAN_rlc.bin"); MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); +static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh); static void si_pcie_gen3_enable(struct radeon_device *rdev); static void si_program_aspm(struct radeon_device *rdev); extern void sumo_rlc_fini(struct radeon_device *rdev); @@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev) u32 sx_debug_1; u32 hdp_host_path_cntl; u32 tmp; - int i, j; + int i, j, k; switch (rdev->family) { case CHIP_TAHITI: @@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.max_sh_per_se, rdev->config.si.max_cu_per_sh); + for (i = 0; i < rdev->config.si.max_shader_engines; i++) { + for (j = 0; j < rdev->config.si.max_sh_per_se; j++) { + for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) { + rdev->config.si.active_cus += + hweight32(si_get_cu_active_bitmap(rdev, i, j)); + } + } + } /* set HW defaults for 3D engine */ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index aefa2f6..1cc0b61 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1007,7 +1007,7 @@ struct drm_radeon_cs { #define RADEON_INFO_NUM_BYTES_MOVED 0x1d #define RADEON_INFO_VRAM_USAGE 0x1e #define RADEON_INFO_GTT_USAGE 0x1f - +#define RADEON_INFO_ACTIVE_CU_COUNT 0x20 struct drm_radeon_info { uint32_t request; -- 1.8.3.1
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev