From: Marek Olšák <marek.ol...@amd.com> 26011 shaders in 14651 tests Totals: SGPRS: 1251920 -> 1152636 (-7.93 %) VGPRS: 728421 -> 728198 (-0.03 %) Spilled SGPRs: 16644 -> 3776 (-77.31 %) Spilled VGPRs: 369 -> 369 (0.00 %) Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread Code Size: 36001064 -> 35835152 (-0.46 %) bytes LDS: 767 -> 767 (0.00 %) blocks Max Waves: 222221 -> 222372 (0.07 %) Wait states: 0 -> 0 (0.00 %) --- src/gallium/drivers/radeonsi/si_shader.c | 123 +++++++------------------------ 1 file changed, 28 insertions(+), 95 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3f77714..c96c52e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -100,25 +100,20 @@ struct si_shader_context LLVMTargetMachineRef tm; unsigned invariant_load_md_kind; unsigned range_md_kind; unsigned uniform_md_kind; LLVMValueRef empty_md; /* Preloaded descriptors. */ LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS]; - LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS]; - LLVMValueRef sampler_views[SI_NUM_SAMPLERS]; - LLVMValueRef sampler_states[SI_NUM_SAMPLERS]; - LLVMValueRef fmasks[SI_NUM_SAMPLERS]; - LLVMValueRef images[SI_NUM_IMAGES]; LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef lds; LLVMValueRef gs_next_vertex[4]; LLVMValueRef return_value; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; @@ -3420,30 +3415,32 @@ static void membar_emit( struct si_shader_context *ctx = si_shader_context(bld_base); emit_waitcnt(ctx); } static LLVMValueRef shader_buffer_fetch_rsrc(struct si_shader_context *ctx, const struct tgsi_full_src_register *reg) { LLVMValueRef ind_index; - LLVMValueRef rsrc_ptr; + LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_SHADER_BUFFERS); - if (!reg->Register.Indirect) - return ctx->shader_buffers[reg->Register.Index]; + if (!reg->Register.Indirect) { + ind_index = LLVMConstInt(ctx->i32, reg->Register.Index, 0); + return build_indexed_load_const(ctx, rsrc_ptr, ind_index); + } ind_index = get_bounded_indirect_index(ctx, ®->Indirect, reg->Register.Index, SI_NUM_SHADER_BUFFERS); - rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS); return build_indexed_load_const(ctx, rsrc_ptr, ind_index); } static bool tgsi_is_array_sampler(unsigned target) { return target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY || target == TGSI_TEXTURE_2D_ARRAY || target == TGSI_TEXTURE_SHADOW2D_ARRAY || target == TGSI_TEXTURE_CUBE_ARRAY || @@ -3493,46 +3490,54 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, * Load the resource descriptor for \p image. */ static void image_fetch_rsrc( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *image, bool dcc_off, LLVMValueRef *rsrc) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_IMAGES); assert(image->Register.File == TGSI_FILE_IMAGE); if (!image->Register.Indirect) { - /* Fast path: use preloaded resources */ - *rsrc = ctx->images[image->Register.Index]; + struct tgsi_shader_info *info = &ctx->shader->selector->info; + int i = image->Register.Index; + LLVMValueRef index = LLVMConstInt(ctx->i32, i, 0); + + /* Rely on LLVM to shrink the load for buffer resources. */ + *rsrc = build_indexed_load_const(ctx, rsrc_ptr, index); + + if (info->images_writemask & (1 << i) && + !(info->images_buffers & (1 << i))) + *rsrc = force_dcc_off(ctx, *rsrc); } else { /* Indexing and manual load */ LLVMValueRef ind_index; - LLVMValueRef rsrc_ptr; LLVMValueRef tmp; /* From the GL_ARB_shader_image_load_store extension spec: * * If a shader performs an image load, store, or atomic * operation using an image variable declared as an array, * and if the index used to select an individual element is * negative or greater than or equal to the size of the * array, the results of the operation are undefined but may * not lead to termination. */ ind_index = get_bounded_indirect_index(ctx, &image->Indirect, image->Register.Index, SI_NUM_IMAGES); - rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES); tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index); if (dcc_off) tmp = force_dcc_off(ctx, tmp); *rsrc = tmp; } } static LLVMValueRef image_fetch_coords( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, @@ -4405,25 +4410,31 @@ static void tex_fetch_ptrs( *fmask_ptr = load_sampler_desc(ctx, ind_index, DESC_FMASK); } else { if (samp_ptr) { *samp_ptr = load_sampler_desc(ctx, ind_index, DESC_SAMPLER); *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); } if (fmask_ptr) *fmask_ptr = NULL; } } else { - *res_ptr = ctx->sampler_views[sampler_index]; - if (samp_ptr) - *samp_ptr = ctx->sampler_states[sampler_index]; + LLVMValueRef index = LLVMConstInt(ctx->i32, sampler_index, 0); + + *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE); + + if (samp_ptr) { + *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER); + *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, + *samp_ptr); + } if (fmask_ptr) - *fmask_ptr = ctx->fmasks[sampler_index]; + *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK); } } static void txq_fetch_args( struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; @@ -5876,95 +5887,20 @@ static void preload_constant_buffers(struct si_shader_context *ctx) for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) { if (info->const_file_max[buf] == -1) continue; /* Load the resource descriptor */ ctx->const_buffers[buf] = build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf)); } } -static void preload_shader_buffers(struct si_shader_context *ctx) -{ - struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; - LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_SHADER_BUFFERS); - int buf, maxbuf; - - maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER], - SI_NUM_SHADER_BUFFERS - 1); - for (buf = 0; buf <= maxbuf; ++buf) { - ctx->shader_buffers[buf] = - build_indexed_load_const( - ctx, ptr, lp_build_const_int32(gallivm, buf)); - } -} - -static void preload_samplers(struct si_shader_context *ctx) -{ - struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; - struct gallivm_state *gallivm = bld_base->base.gallivm; - const struct tgsi_shader_info *info = bld_base->info; - unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1; - LLVMValueRef offset; - - if (num_samplers == 0) - return; - - /* Load the resources and samplers, we rely on the code sinking to do the rest */ - for (i = 0; i < num_samplers; ++i) { - /* Resource */ - offset = lp_build_const_int32(gallivm, i); - ctx->sampler_views[i] = - load_sampler_desc(ctx, offset, DESC_IMAGE); - - /* FMASK resource */ - if (info->is_msaa_sampler[i]) - ctx->fmasks[i] = - load_sampler_desc(ctx, offset, DESC_FMASK); - else { - ctx->sampler_states[i] = - load_sampler_desc(ctx, offset, DESC_SAMPLER); - ctx->sampler_states[i] = - sici_fix_sampler_aniso(ctx, ctx->sampler_views[i], - ctx->sampler_states[i]); - } - } -} - -static void preload_images(struct si_shader_context *ctx) -{ - struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; - struct tgsi_shader_info *info = &ctx->shader->selector->info; - struct gallivm_state *gallivm = bld_base->base.gallivm; - unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1; - LLVMValueRef res_ptr; - unsigned i; - - if (num_images == 0) - return; - - res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES); - - for (i = 0; i < num_images; ++i) { - /* Rely on LLVM to shrink the load for buffer resources. */ - LLVMValueRef rsrc = - build_indexed_load_const(ctx, res_ptr, - lp_build_const_int32(gallivm, i)); - - if (info->images_writemask & (1 << i) && - !(info->images_buffers & (1 << i))) - rsrc = force_dcc_off(ctx, rsrc); - - ctx->images[i] = rsrc; - } -} - /** * Load ESGS and GSVS ring buffer resource descriptors and save the variables * for later use. */ static void preload_ring_buffers(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, @@ -6793,23 +6729,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, ctx.radeon_bld.declare_memory_region = declare_compute_memory; break; default: assert(!"Unsupported shader type"); return -1; } create_meta_data(&ctx); create_function(&ctx); preload_constant_buffers(&ctx); - preload_shader_buffers(&ctx); - preload_samplers(&ctx); - preload_images(&ctx); preload_ring_buffers(&ctx); if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && shader->key.ps.prolog.poly_stipple) { LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn, SI_PARAM_RW_BUFFERS); si_llvm_emit_polygon_stipple(&ctx, list, SI_PARAM_POS_FIXED_PT); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev