On 09/10/2016 10:02 AM, Bas Nieuwenhuizen wrote: > Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> > > On Sat, Sep 10, 2016 at 12:40 AM, Marek Olšák <mar...@gmail.com> wrote: >> From: Marek Olšák <marek.ol...@amd.com> >> >> LLVM can CSE the loads, thus we can always re-load constants before each >> use. The decrease in SGPR spilling is huge. >> >> The best improvements are the dumbest ones. >> >> 26011 shaders in 14651 tests >> Totals: >> SGPRS: 1453346 -> 1251920 (-13.86 %) >> VGPRS: 742576 -> 728421 (-1.91 %) >> Spilled SGPRs: 52298 -> 16644 (-68.17 %) >> Spilled VGPRs: 397 -> 369 (-7.05 %) >> Scratch VGPRs: 1372 -> 1344 (-2.04 %) dwords per thread >> Code Size: 36136488 -> 36001064 (-0.37 %) bytes >> LDS: 767 -> 767 (0.00 %) blocks >> Max Waves: 219315 -> 222221 (1.33 %) >> --- >> src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++++------------------- >> 1 file changed, 11 insertions(+), 19 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >> b/src/gallium/drivers/radeonsi/si_shader.c >> index 0b7de18..08e3cee 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.c >> +++ b/src/gallium/drivers/radeonsi/si_shader.c >> @@ -1874,26 +1874,33 @@ static LLVMValueRef fetch_constant( >> for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) >> values[chan] = fetch_constant(bld_base, reg, type, >> chan); >> >> return lp_build_gather_values(bld_base->base.gallivm, >> values, 4); >> } >> >> buf = reg->Register.Dimension ? reg->Dimension.Index : 0; >> idx = reg->Register.Index * 4 + swizzle; >> >> if (!reg->Register.Indirect && !reg->Dimension.Indirect) { >> + LLVMValueRef c0, c1; >> + >> + c0 = buffer_load_const(ctx, ctx->const_buffers[buf], >> + LLVMConstInt(ctx->i32, idx * 4, 0)); >> + >> if (!tgsi_type_is_64bit(type)) >> - return bitcast(bld_base, type, >> ctx->constants[buf][idx]); >> + return bitcast(bld_base, type, c0); >> else { >> + c1 = buffer_load_const(ctx, ctx->const_buffers[buf], >> + LLVMConstInt(ctx->i32, >> + (idx + 1) * 4, >> 0)); >> return radeon_llvm_emit_fetch_64bit(bld_base, type, >> - >> ctx->constants[buf][idx], >> - >> ctx->constants[buf][idx + 1]); >> + c0, c1); >> } >> } >> >> if (reg->Register.Dimension && reg->Dimension.Indirect) { >> LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, >> SI_PARAM_CONST_BUFFERS); >> LLVMValueRef index; >> index = get_bounded_indirect_index(ctx, ®->DimIndirect, >> reg->Dimension.Index, >> SI_NUM_CONST_BUFFERS); >> bufp = build_indexed_load_const(ctx, ptr, index); >> @@ -5789,39 +5796,26 @@ static void create_function(struct si_shader_context >> *ctx) >> >> static void preload_constants(struct si_shader_context *ctx) >> { >> struct lp_build_tgsi_context *bld_base = >> &ctx->radeon_bld.soa.bld_base; >> struct gallivm_state *gallivm = bld_base->base.gallivm; >> const struct tgsi_shader_info *info = bld_base->info; >> unsigned buf; >> LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, >> SI_PARAM_CONST_BUFFERS); >> >> for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) { >> - unsigned i, num_const = info->const_file_max[buf] + 1; >> - >> - if (num_const == 0) >> + if (info->const_file_max[buf] == -1) >> continue; >> >> - /* Allocate space for the constant values */ >> - ctx->constants[buf] = CALLOC(num_const * 4, >> sizeof(LLVMValueRef)); >> - >> /* Load the resource descriptor */ >> ctx->const_buffers[buf] = >> build_indexed_load_const(ctx, ptr, >> lp_build_const_int32(gallivm, buf)); >> - >> - /* Load the constants, we rely on the code sinking to do the >> rest */ >> - for (i = 0; i < num_const * 4; ++i) { >> - ctx->constants[buf][i] = >> - buffer_load_const(ctx, >> - ctx->const_buffers[buf], >> - lp_build_const_int32(gallivm, i * >> 4)); >> - } >> } >> } >> >> static void preload_shader_buffers(struct si_shader_context *ctx) >> { >> struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; >> LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, >> SI_PARAM_SHADER_BUFFERS); >> int buf, maxbuf; >> >> maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER], >> @@ -6898,22 +6892,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, >> ctx.shader = shader->gs_copy_shader; >> if ((r = si_generate_gs_copy_shader(sscreen, &ctx, >> shader, debug))) { >> free(shader->gs_copy_shader); >> shader->gs_copy_shader = NULL; >> goto out; >> } >> } >> >> out: >> - for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++) >> - FREE(ctx.constants[i]); >> return r; >> } >> >> /** >> * Create, compile and return a shader part (prolog or epilog). >> * >> * \param sscreen screen >> * \param list list of shader parts of the same category >> * \param key shader part key >> * \param tm LLVM target machine >> -- >> 2.7.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev