From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 54 +++++++++++------------ src/gallium/drivers/radeonsi/si_shader_internal.h | 4 ++ 2 files changed, 31 insertions(+), 27 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8ae742c93f6..08b071e810b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -249,23 +249,23 @@ static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx, if (rshift + bitwidth < 32) { unsigned mask = (1 << bitwidth) - 1; value = LLVMBuildAnd(ctx->ac.builder, value, LLVMConstInt(ctx->i32, mask, 0), ""); } return value; } -static LLVMValueRef unpack_param(struct si_shader_context *ctx, - unsigned param, unsigned rshift, - unsigned bitwidth) +LLVMValueRef si_unpack_param(struct si_shader_context *ctx, + unsigned param, unsigned rshift, + unsigned bitwidth) { LLVMValueRef value = LLVMGetParam(ctx->main_fn, param); return unpack_llvm_param(ctx, value, rshift, bitwidth); } static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) { switch (ctx->type) { case PIPE_SHADER_TESS_CTRL: @@ -298,21 +298,21 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) * - ... * * All three shaders VS(LS), TCS, TES share the same LDS space. */ static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx) { - return unpack_param(ctx, ctx->param_vs_state_bits, 8, 13); + return si_unpack_param(ctx, ctx->param_vs_state_bits, 8, 13); } static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx) { assert(ctx->type == PIPE_SHADER_TESS_CTRL); if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4; return util_last_bit64(ctx->shader->selector->outputs_written) * 4; @@ -321,46 +321,46 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context * static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx) { unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx); return LLVMConstInt(ctx->i32, stride, 0); } static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) { if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) - return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13); + return si_unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13); const struct tgsi_shader_info *info = &ctx->shader->selector->info; unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx); unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written); unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4; return LLVMConstInt(ctx->i32, patch_dw_stride, 0); } static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx) { return lp_build_mul_imm(&ctx->bld_base.uint_bld, - unpack_param(ctx, + si_unpack_param(ctx, ctx->param_tcs_out_lds_offsets, 0, 16), 4); } static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx) { return lp_build_mul_imm(&ctx->bld_base.uint_bld, - unpack_param(ctx, + si_unpack_param(ctx, ctx->param_tcs_out_lds_offsets, 16, 16), 4); } static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx) { LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); @@ -398,39 +398,39 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx) static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx) { unsigned tcs_out_vertices = ctx->shader->selector ? ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0; /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */ if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices) return LLVMConstInt(ctx->i32, tcs_out_vertices, 0); - return unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6); + return si_unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6); } static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx) { unsigned stride; switch (ctx->type) { case PIPE_SHADER_VERTEX: stride = util_last_bit64(ctx->shader->selector->outputs_written); return LLVMConstInt(ctx->i32, stride * 4, 0); case PIPE_SHADER_TESS_CTRL: if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) { stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written); return LLVMConstInt(ctx->i32, stride * 4, 0); } - return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8); + return si_unpack_param(ctx, ctx->param_vs_state_bits, 24, 8); default: assert(0); return NULL; } } static LLVMValueRef get_instance_index_for_fetch( struct si_shader_context *ctx, unsigned param_start_instance, LLVMValueRef divisor) @@ -959,21 +959,21 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, */ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, LLVMValueRef rel_patch_id, LLVMValueRef vertex_index, LLVMValueRef param_index) { LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; LLVMValueRef param_stride, constant16; vertices_per_patch = get_num_tcs_out_vertices(ctx); - num_patches = unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 6); + num_patches = si_unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 6); total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, ""); constant16 = LLVMConstInt(ctx->i32, 16, 0); if (vertex_index) { base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id, vertices_per_patch, ""); base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, vertex_index, ""); @@ -985,21 +985,21 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, } base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMBuildMul(ctx->ac.builder, param_index, param_stride, ""), ""); base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); if (!vertex_index) { LLVMValueRef patch_data_offset = - unpack_param(ctx, ctx->param_tcs_offchip_layout, 12, 20); + si_unpack_param(ctx, ctx->param_tcs_offchip_layout, 12, 20); base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, ""); } return base_addr; } /* This is a generic helper that can be shared by the NIR and TGSI backends */ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices( struct si_shader_context *ctx, @@ -1620,29 +1620,29 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, LLVMValueRef value; param = si_shader_io_get_unique_index(semantic_name, semantic_index); /* GFX9 has the ESGS ring in LDS. */ if (ctx->screen->info.chip_class >= GFX9) { unsigned index = vtx_offset_param; switch (index / 2) { case 0: - vtx_offset = unpack_param(ctx, ctx->param_gs_vtx01_offset, + vtx_offset = si_unpack_param(ctx, ctx->param_gs_vtx01_offset, index % 2 ? 16 : 0, 16); break; case 1: - vtx_offset = unpack_param(ctx, ctx->param_gs_vtx23_offset, + vtx_offset = si_unpack_param(ctx, ctx->param_gs_vtx23_offset, index % 2 ? 16 : 0, 16); break; case 2: - vtx_offset = unpack_param(ctx, ctx->param_gs_vtx45_offset, + vtx_offset = si_unpack_param(ctx, ctx->param_gs_vtx45_offset, index % 2 ? 16 : 0, 16); break; default: assert(0); return NULL; } vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset, LLVMConstInt(ctx->i32, param * 4, 0), ""); return lds_load(bld_base, type, swizzle, vtx_offset); @@ -1921,21 +1921,21 @@ static void declare_input_fs( struct si_shader_context *ctx, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { si_llvm_load_input_fs(ctx, input_index, out); } static LLVMValueRef get_sample_id(struct si_shader_context *ctx) { - return unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4); + return si_unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4); } static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); /* For non-indexed draws, the base vertex set by the driver * (for direct draws) or the CP (for indirect draws) is the * first vertex ID, but GLSL expects 0 to be returned. */ @@ -2070,21 +2070,21 @@ static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, } return load_tess_level(ctx, semantic_name); } static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); if (ctx->type == PIPE_SHADER_TESS_CTRL) - return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 6); + return si_unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 6); else if (ctx->type == PIPE_SHADER_TESS_EVAL) return get_num_tcs_out_vertices(ctx); else unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN"); } void si_load_system_value(struct si_shader_context *ctx, unsigned index, const struct tgsi_full_declaration *decl) { @@ -2774,21 +2774,21 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, unsigned noutput, unsigned stream) { struct si_shader_selector *sel = ctx->shader->selector; struct pipe_stream_output_info *so = &sel->so; LLVMBuilderRef builder = ctx->ac.builder; int i; struct lp_build_if_state if_ctx; /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = - unpack_param(ctx, ctx->param_streamout_config, 16, 7); + si_unpack_param(ctx, ctx->param_streamout_config, 16, 7); LLVMValueRef tid = ac_get_thread_id(&ctx->ac); /* can_emit = tid < so_vtx_count; */ LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); /* Emit the streamout code conditionally. This actually avoids * out-of-bounds buffer access. The hw tells us via the SGPR * (so_vtx_count) which threads are allowed to emit streamout data. */ @@ -3550,21 +3550,21 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, struct tgsi_shader_info *info = &es->selector->info; LLVMValueRef soffset = LLVMGetParam(ctx->main_fn, ctx->param_es2gs_offset); LLVMValueRef lds_base = NULL; unsigned chan; int i; if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) { unsigned itemsize_dw = es->selector->esgs_itemsize / 4; LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); - LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4); + LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->param_merged_wave_info, 24, 4); vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, LLVMBuildMul(ctx->ac.builder, wave_idx, LLVMConstInt(ctx->i32, 64, false), ""), ""); lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, LLVMConstInt(ctx->i32, itemsize_dw, 0), ""); } for (i = 0; i < info->num_outputs; i++) { int param; @@ -3593,21 +3593,21 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, } } if (ctx->screen->info.chip_class >= GFX9) si_set_es_return_value_for_gs(ctx); } static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) { if (ctx->screen->info.chip_class >= GFX9) - return unpack_param(ctx, ctx->param_merged_wave_info, 16, 8); + return si_unpack_param(ctx, ctx->param_merged_wave_info, 16, 8); else return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id); } static void emit_gs_epilogue(struct si_shader_context *ctx) { ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx)); if (ctx->screen->info.chip_class >= GFX9) @@ -5156,22 +5156,22 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, LLVMValueRef param_rw_buffers, unsigned param_pos_fixed_pt) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef slot, desc, offset, row, bit, address[2]; /* Use the fixed-point gl_FragCoord input. * Since the stipple pattern is 32x32 and it repeats, just get 5 bits * per coordinate to get the repeating effect. */ - address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5); - address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5); + address[0] = si_unpack_param(ctx, param_pos_fixed_pt, 0, 5); + address[1] = si_unpack_param(ctx, param_pos_fixed_pt, 16, 5); /* Load the buffer descriptor. */ slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0); desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot); /* The stipple pattern is 32x32, each row has 32 bits. */ offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->i32, 4, 0), ""); row = buffer_load_const(ctx, desc, offset); row = ac_to_integer(&ctx->ac, row); @@ -5734,21 +5734,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, create_function(&ctx); preload_ring_buffers(&ctx); LLVMValueRef voffset = lp_build_mul_imm(uint, ctx.abi.vertex_id, 4); /* Fetch the vertex stream ID.*/ LLVMValueRef stream_id; if (gs_selector->so.num_outputs) - stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2); + stream_id = si_unpack_param(&ctx, ctx.param_streamout_config, 24, 2); else stream_id = ctx.i32_0; /* Fill in output information. */ for (i = 0; i < gsinfo->num_outputs; ++i) { outputs[i].semantic_name = gsinfo->output_semantic_name[i]; outputs[i].semantic_index = gsinfo->output_semantic_index[i]; for (int chan = 0; chan < 4; chan++) { outputs[i].vertex_stream[chan] = @@ -6114,21 +6114,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, } else if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY) { if (!is_monolithic) ac_init_exec_full_mask(&ctx->ac); /* The barrier must execute for all shaders in a * threadgroup. */ si_llvm_emit_barrier(NULL, bld_base, NULL); - LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); + LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); } } if (ctx->type == PIPE_SHADER_TESS_CTRL && sel->tcs_info.tessfactors_are_def_in_all_invocs) { for (unsigned i = 0; i < 6; i++) { @@ -6436,22 +6436,22 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, const unsigned gfx9_vtx_params[3] = { num_sgprs, num_sgprs + 1, num_sgprs + 4, }; LLVMValueRef vtx_in[6], vtx_out[6]; LLVMValueRef prim_id, rotate; if (ctx->screen->info.chip_class >= GFX9) { for (unsigned i = 0; i < 3; i++) { - vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16); - vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16); + vtx_in[i*2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16); + vtx_in[i*2+1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16); } } else { for (unsigned i = 0; i < 6; i++) vtx_in[i] = LLVMGetParam(func, gfx6_vtx_params[i]); } prim_id = LLVMGetParam(func, num_sgprs + 2); rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); for (unsigned i = 0; i < 6; ++i) { @@ -7221,21 +7221,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, si_init_exec_from_input(ctx, 3, 0); if (key->vs_prolog.as_ls && ctx->screen->has_ls_vgpr_init_bug) { /* If there are no HS threads, SPI loads the LS VGPRs * starting at VGPR 0. Shift them back to where they * belong. */ LLVMValueRef has_hs_threads = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, - unpack_param(ctx, 3, 8, 8), + si_unpack_param(ctx, 3, 8, 8), ctx->i32_0, ""); for (i = 4; i > 0; --i) { input_vgprs[i + 1] = LLVMBuildSelect(ctx->ac.builder, has_hs_threads, input_vgprs[i + 1], input_vgprs[i - 1], ""); } } } @@ -7733,21 +7733,21 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, 0x5555, 0x1111, 0x0101, 0x0001, }; assert(key->ps_prolog.states.samplemask_log_ps_iter < ARRAY_SIZE(ps_iter_masks)); uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter]; unsigned ancillary_vgpr = key->ps_prolog.num_input_sgprs + key->ps_prolog.ancillary_vgpr_index; - LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4); + LLVMValueRef sampleid = si_unpack_param(ctx, ancillary_vgpr, 8, 4); LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 1); samplemask = ac_to_integer(&ctx->ac, samplemask); samplemask = LLVMBuildAnd( ctx->ac.builder, samplemask, LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->i32, ps_iter_mask, false), sampleid, ""), ""); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 1730a1fef19..1bd52722413 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -322,11 +322,15 @@ void si_llvm_load_input_vs( struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4]); void si_llvm_load_input_fs( struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4]); bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir); +LLVMValueRef si_unpack_param(struct si_shader_context *ctx, + unsigned param, unsigned rshift, + unsigned bitwidth); + #endif -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev