On 09/09/2016 05:29 AM, Marek Olšák wrote: > On Fri, Sep 9, 2016 at 10:12 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: >> From: Nicolai Hähnle <nicolai.haeh...@amd.com> >> >> Not sure if it's possible to avoid programming the block size twice (once for >> the userdata and once for the dispatch). >> >> Since the shaders are compiled with a pessimistic upper limit on the number >> of >> registers, asynchronously compiling variants may be worth considering in the >> future if we observe the shaders to be dispatched with small block sizes. >> --- >> I think this is sufficient to support variable group sizes on radeonsi, but >> it's completely untested. Do you keep the latest version of your series in a >> public repository somewhere? >> >> src/gallium/drivers/radeonsi/si_compute.c | 10 +++++++++- >> src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++++++++++++++--------- >> src/gallium/drivers/radeonsi/si_shader.h | 4 +++- >> 3 files changed, 32 insertions(+), 11 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_compute.c >> b/src/gallium/drivers/radeonsi/si_compute.c >> index 5041761..26e096c 100644 >> --- a/src/gallium/drivers/radeonsi/si_compute.c >> +++ b/src/gallium/drivers/radeonsi/si_compute.c >> @@ -379,25 +379,33 @@ static void si_setup_tgsi_grid(struct si_context *sctx, >> for (i = 0; i < 3; ++i) { >> radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); >> radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | >> COPY_DATA_DST_SEL(COPY_DATA_REG)); >> radeon_emit(cs, (va + 4 * i)); >> radeon_emit(cs, (va + 4 * i) >> 32); >> radeon_emit(cs, (grid_size_reg >> 2) + i); >> radeon_emit(cs, 0); >> } >> } else { >> + struct si_compute *program = sctx->cs_shader_state.program; >> + bool variable_group_size = >> + >> program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] >> == 0; >> >> - radeon_set_sh_reg_seq(cs, grid_size_reg, 3); >> + radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size >> ? 6 : 3); >> radeon_emit(cs, info->grid[0]); >> radeon_emit(cs, info->grid[1]); >> radeon_emit(cs, info->grid[2]); >> + if (variable_group_size) { >> + radeon_emit(cs, info->block[0]); >> + radeon_emit(cs, info->block[1]); >> + radeon_emit(cs, info->block[2]); >> + } >> } >> } >> >> static void si_emit_dispatch_packets(struct si_context *sctx, >> const struct pipe_grid_info *info) >> { >> struct radeon_winsys_cs *cs = sctx->b.gfx.cs; >> bool render_cond_bit = sctx->b.render_cond && >> !sctx->b.render_cond_force_off; >> unsigned waves_per_threadgroup = >> DIV_ROUND_UP(info->block[0] * info->block[1] * >> info->block[2], 64); >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >> b/src/gallium/drivers/radeonsi/si_shader.c >> index 0b7de18..730ee21 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.c >> +++ b/src/gallium/drivers/radeonsi/si_shader.c >> @@ -1783,30 +1783,35 @@ static void declare_system_value( >> >> case TGSI_SEMANTIC_GRID_SIZE: >> value = LLVMGetParam(radeon_bld->main_fn, >> SI_PARAM_GRID_SIZE); >> break; >> >> case TGSI_SEMANTIC_BLOCK_SIZE: >> { >> LLVMValueRef values[3]; >> unsigned i; >> unsigned *properties = >> ctx->shader->selector->info.properties; >> - unsigned sizes[3] = { >> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH], >> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT], >> - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] >> - }; >> >> - for (i = 0; i < 3; ++i) >> - values[i] = lp_build_const_int32(gallivm, sizes[i]); >> + if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) { >> + unsigned sizes[3] = { >> + >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH], >> + >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT], >> + >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] >> + }; >> + >> + for (i = 0; i < 3; ++i) >> + values[i] = lp_build_const_int32(gallivm, >> sizes[i]); >> >> - value = lp_build_gather_values(gallivm, values, 3); >> + value = lp_build_gather_values(gallivm, values, 3); >> + } else { >> + value = LLVMGetParam(radeon_bld->main_fn, >> SI_PARAM_BLOCK_SIZE); >> + } >> break; >> } >> >> case TGSI_SEMANTIC_BLOCK_ID: >> value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_ID); >> break; >> >> case TGSI_SEMANTIC_THREAD_ID: >> value = LLVMGetParam(radeon_bld->main_fn, >> SI_PARAM_THREAD_ID); >> break; >> @@ -5705,20 +5710,21 @@ static void create_function(struct si_shader_context >> *ctx) >> >> for (i = 0; i < num_return_sgprs; i++) >> returns[i] = ctx->i32; >> for (; i < num_returns; i++) >> returns[i] = ctx->f32; >> } >> break; >> >> case PIPE_SHADER_COMPUTE: >> params[SI_PARAM_GRID_SIZE] = v3i32; >> + params[SI_PARAM_BLOCK_SIZE] = v3i32; >> params[SI_PARAM_BLOCK_ID] = v3i32; >> last_sgpr = SI_PARAM_BLOCK_ID; >> >> params[SI_PARAM_THREAD_ID] = v3i32; >> num_params = SI_PARAM_THREAD_ID + 1; >> break; >> default: >> assert(0 && "unimplemented shader"); >> return; >> } >> @@ -5741,21 +5747,26 @@ static void create_function(struct si_shader_context >> *ctx) >> S_0286D0_LINEAR_CENTROID_ENA(1) | >> S_0286D0_FRONT_FACE_ENA(1) | >> S_0286D0_POS_FIXED_PT_ENA(1)); >> } else if (ctx->type == PIPE_SHADER_COMPUTE) { >> const unsigned *properties = >> shader->selector->info.properties; >> unsigned max_work_group_size = >> >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * >> >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * >> >> properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; >> >> - assert(max_work_group_size); >> + if (!max_work_group_size) { >> + /* This is a variable group size compute shader, >> + * compile it for the maximum possible group size. >> + */ >> + max_work_group_size = 2048; >> + } > > The VGPR limit is 32. That's too limiting. I'd prefer it if > GL_MAX_COMPUTE_VARIABLE_WORK_GROUP_INVOCATIONS_ARB was set to 512 or > 1024. That should move the limit to 128 or 64, respectively.
The minimum allowed by the spec is 512, so it seems likely that most applications will only expect that limit. > Marek > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev