From: Marek Olšák <marek.ol...@amd.com> SGPRS: 2170102 -> 2158430 (-0.54 %) VGPRS: 1645656 -> 1641516 (-0.25 %) Spilled SGPRs: 9078 -> 8810 (-2.95 %) Spilled VGPRs: 130 -> 114 (-12.31 %) Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread Code Size: 52094872 -> 52692540 (1.15 %) bytes Max Waves: 371848 -> 372723 (0.24 %)
v2: - the shader cache needs to take address32_hi into account - set amdgpu-32bit-address-high-bits Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> (v1) --- src/amd/common/ac_llvm_build.c | 13 +++ src/amd/common/ac_llvm_build.h | 5 + src/gallium/drivers/radeonsi/si_descriptors.c | 10 +- src/gallium/drivers/radeonsi/si_pipe.c | 16 ++- src/gallium/drivers/radeonsi/si_shader.c | 118 ++++++++++++++-------- src/gallium/drivers/radeonsi/si_shader.h | 23 ++++- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 8 +- 7 files changed, 137 insertions(+), 56 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 3efcaa1..e1ec81f 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, ctx->context = context; ctx->module = NULL; ctx->builder = NULL; ctx->voidt = LLVMVoidTypeInContext(ctx->context); ctx->i1 = LLVMInt1TypeInContext(ctx->context); ctx->i8 = LLVMInt8TypeInContext(ctx->context); ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); + ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); ctx->v2i16 = LLVMVectorType(ctx->i16, 2); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v3i32 = LLVMVectorType(ctx->i32, 3); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v2f32 = LLVMVectorType(ctx->f32, 2); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); @@ -132,21 +133,24 @@ unsigned ac_get_type_size(LLVMTypeRef type) { LLVMTypeKind kind = LLVMGetTypeKind(type); switch (kind) { case LLVMIntegerTypeKind: return LLVMGetIntTypeWidth(type) / 8; case LLVMFloatTypeKind: return 4; case LLVMDoubleTypeKind: + return 8; case LLVMPointerTypeKind: + if (LLVMGetPointerAddressSpace(type) == AC_CONST_32BIT_ADDR_SPACE) + return 4; return 8; case LLVMVectorTypeKind: return LLVMGetVectorSize(type) * ac_get_type_size(LLVMGetElementType(type)); case LLVMArrayTypeKind: return LLVMGetArrayLength(type) * ac_get_type_size(LLVMGetElementType(type)); default: assert(0); return 0; @@ -1982,10 +1986,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMIntEQ, src0, ctx->i32_0, ""), LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) { return LLVMPointerType(LLVMArrayType(elem_type, 0), AC_CONST_ADDR_SPACE); } + +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) +{ + if (!HAVE_32BIT_POINTERS) + return ac_array_in_const_addr_space(elem_type); + + return LLVMPointerType(LLVMArrayType(elem_type, 0), + AC_CONST_32BIT_ADDR_SPACE); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 35f849a..116037a 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -27,36 +27,40 @@ #include <stdbool.h> #include <llvm-c/TargetMachine.h> #include "amd_family.h" #ifdef __cplusplus extern "C" { #endif +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600) + enum { AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that selects SMEM loads */ AC_LOCAL_ADDR_SPACE = 3, + AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */ }; struct ac_llvm_context { LLVMContextRef context; LLVMModuleRef module; LLVMBuilderRef builder; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i16; LLVMTypeRef i32; LLVMTypeRef i64; + LLVMTypeRef intptr; LLVMTypeRef f16; LLVMTypeRef f32; LLVMTypeRef f64; LLVMTypeRef v2i16; LLVMTypeRef v2i32; LLVMTypeRef v3i32; LLVMTypeRef v4i32; LLVMTypeRef v2f32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; @@ -340,16 +344,17 @@ void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type); #ifdef __cplusplus } #endif #endif diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 393053c..5e7134a 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2057,31 +2057,35 @@ void si_shader_change_notify(struct si_context *sctx) } else { si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0); } } static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs, struct si_descriptors *desc, unsigned sh_base, unsigned pointer_count) { - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0)); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0)); radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2); } static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs, struct si_descriptors *desc) { uint64_t va = desc->gpu_address; radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + + if (HAVE_32BIT_POINTERS) + assert(va <= 0xffffffff); + else + radeon_emit(cs, va >> 32); } static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc, unsigned sh_base) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; si_emit_shader_pointer_head(cs, desc, sh_base, 1); si_emit_shader_pointer_body(cs, desc); @@ -2614,22 +2618,24 @@ void si_all_resident_buffers_begin_new_cs(struct si_context *sctx) sctx->b.num_resident_handles += num_resident_tex_handles + num_resident_img_handles; } /* INIT/DEINIT/UPLOAD */ void si_init_all_descriptors(struct si_context *sctx) { int i; +#if !HAVE_32BIT_POINTERS STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); +#endif for (i = 0; i < SI_NUM_SHADERS; i++) { bool gfx9_tcs = false; bool gfx9_gs = false; unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS; unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; struct si_descriptors *desc; if (sctx->b.chip_class >= GFX9) { gfx9_tcs = i == PIPE_SHADER_TESS_CTRL; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1a5d598..f863a2a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -631,25 +631,31 @@ static void si_disk_cache_create(struct si_screen *sscreen) uint32_t llvm_timestamp; if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp)) { res = asprintf(×tamp_str, "%u_%u", mesa_timestamp, llvm_timestamp); } if (res != -1) { /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - sscreen->debug_flags & - (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | - DBG(SI_SCHED) | - DBG(UNSAFE_MATH)); + #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ + DBG(SI_SCHED) | \ + DBG(UNSAFE_MATH)) + uint64_t shader_debug_flags = sscreen->debug_flags & + ALL_FLAGS; + + /* Add the high bits of 32-bit addresses, which affects + * how 32-bit addresses are expanded to 64 bits. + */ + STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); + shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; sscreen->disk_shader_cache = disk_cache_create(si_get_family_name(sscreen), timestamp_str, shader_debug_flags); free(timestamp_str); } } } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1294af3..0024db4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3196,26 +3196,32 @@ si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret, { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef p = LLVMGetParam(ctx->main_fn, param); return LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, p), return_index, ""); } static LLVMValueRef -si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret, - unsigned param, unsigned return_index) +si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, + unsigned param, unsigned return_index) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef ptr, lo, hi; + if (HAVE_32BIT_POINTERS) { + ptr = LLVMGetParam(ctx->main_fn, param); + ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); + return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); + } + ptr = LLVMGetParam(ctx->main_fn, param); ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, ""); ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, ""); lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, ""); hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, ""); ret = LLVMBuildInsertValue(builder, ret, lo, return_index, ""); return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, ""); } /* This only writes the tessellation factor levels. */ @@ -3317,75 +3323,76 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, /* Pass TCS inputs from LS to TCS on GFX9. */ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_bindless_samplers_and_images, - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, + ctx->param_bindless_samplers_and_images, + 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k, 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); - unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); + unsigned desc_param = ctx->param_tcs_factor_addr_base64k + + (HAVE_32BIT_POINTERS ? 1 : 2); + ret = si_insert_input_ptr(ctx, ret, desc_param, + 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, + 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = LLVMBuildInsertValue(ctx->ac.builder, ret, ac_to_float(&ctx->ac, ctx->abi.tcs_patch_id), vgpr++, ""); ret = LLVMBuildInsertValue(ctx->ac.builder, ret, ac_to_float(&ctx->ac, ctx->abi.tcs_rel_ids), vgpr++, ""); ctx->return_value = ret; } /* Pass GS inputs from ES to GS on GFX9. */ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_bindless_samplers_and_images, - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, + 8 + SI_SGPR_RW_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, + ctx->param_bindless_samplers_and_images, + 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); unsigned desc_param = ctx->param_vs_state_bits + 1; - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); + ret = si_insert_input_ptr(ctx, ret, desc_param, + 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, + 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; for (unsigned i = 0; i < 5; i++) { unsigned param = ctx->param_gs_vtx01_offset + i; ret = si_insert_input_ret_float(ctx, ret, param, vgpr++); } ctx->return_value = ret; } static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, @@ -4387,20 +4394,23 @@ static void si_create_function(struct si_shader_context *ctx, lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); } } for (i = 0; i < fninfo->num_params; ++i) { if (fninfo->assign[i]) *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i); } + si_llvm_add_attribute(ctx->main_fn, "amdgpu-32bit-address-high-bits", + ctx->screen->info.address32_hi); + if (max_workgroup_size) { si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size", max_workgroup_size); } LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "no-signed-zeros-fp-math", "true"); if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) { /* These were copied from some LLVM test. */ @@ -4483,46 +4493,46 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, LLVMTypeRef const_shader_buf_type; if (ctx->shader->selector->info.const_buffers_declared == 1 && ctx->shader->selector->info.shader_buffers_declared == 0) const_shader_buf_type = ctx->f32; else const_shader_buf_type = ctx->v4i32; unsigned const_and_shader_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(const_shader_buf_type)); + ac_array_in_const32_addr_space(const_shader_buf_type)); unsigned samplers_and_images = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v8i32)); + ac_array_in_const32_addr_space(ctx->v8i32)); if (assign_params) { ctx->param_const_and_shader_buffers = const_and_shader_buffers; ctx->param_samplers_and_images = samplers_and_images; } } static void declare_global_desc_pointers(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v4i32)); + ac_array_in_const32_addr_space(ctx->v4i32)); ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v8i32)); + ac_array_in_const32_addr_space(ctx->v8i32)); } static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const_addr_space(ctx->v4i32)); + ac_array_in_const32_addr_space(ctx->v4i32)); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); } static void declare_vs_input_vgprs(struct si_shader_context *ctx, struct si_function_info *fninfo, unsigned *num_prolog_vgprs) { @@ -4676,21 +4686,22 @@ static void create_function(struct si_shader_context *ctx) declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, &fninfo); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + if (!HAVE_32BIT_POINTERS) + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_TESS_CTRL); /* VGPRs (first TCS, then VS) */ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id); add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_rel_ids); if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_input_vgprs(ctx, &fninfo, @@ -4732,21 +4743,22 @@ static void create_function(struct si_shader_context *ctx) ctx->type == PIPE_SHADER_TESS_EVAL)); if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_specific_input_sgprs(ctx, &fninfo); } else { /* TESS_EVAL (and also GEOMETRY): * Declare as many input SGPRs as the VS has. */ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + if (!HAVE_32BIT_POINTERS) + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } declare_per_stage_desc_pointers(ctx, &fninfo, ctx->type == PIPE_SHADER_GEOMETRY); /* VGPRs (first GS, then VS/TES) */ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_prim_id); @@ -6465,20 +6477,25 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, num_out = 0; num_out_sgpr = 0; for (unsigned i = 0; i < fninfo.num_params; ++i) { LLVMValueRef param = LLVMGetParam(ctx->main_fn, i); LLVMTypeRef param_type = LLVMTypeOf(param); LLVMTypeRef out_type = i < fninfo.num_sgpr_params ? ctx->i32 : ctx->f32; unsigned size = ac_get_type_size(param_type) / 4; if (size == 1) { + if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { + param = LLVMBuildPtrToInt(builder, param, ctx->i32, ""); + param_type = ctx->i32; + } + if (param_type != out_type) param = LLVMBuildBitCast(builder, param, out_type, ""); out[num_out++] = param; } else { LLVMTypeRef vector_type = LLVMVectorType(out_type, size); if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { param = LLVMBuildPtrToInt(builder, param, ctx->i64, ""); param_type = ctx->i64; } @@ -6540,22 +6557,28 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out)); assert(is_sgpr || out_idx >= num_out_sgpr); if (param_size == 1) arg = out[out_idx]; else arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size); if (LLVMTypeOf(arg) != param_type) { if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) { - arg = LLVMBuildBitCast(builder, arg, ctx->i64, ""); - arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + if (LLVMGetPointerAddressSpace(param_type) == + AC_CONST_32BIT_ADDR_SPACE) { + arg = LLVMBuildBitCast(builder, arg, ctx->i32, ""); + arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + } else { + arg = LLVMBuildBitCast(builder, arg, ctx->i64, ""); + arg = LLVMBuildIntToPtr(builder, arg, param_type, ""); + } } else { arg = LLVMBuildBitCast(builder, arg, param_type, ""); } } in[param_idx] = arg; out_idx += param_size; } ret = LLVMBuildCall(builder, parts[part], in, num_params, ""); @@ -7016,23 +7039,30 @@ out: static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) { LLVMValueRef ptr[2], list; bool is_merged_shader = ctx->screen->info.chip_class >= GFX9 && (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY || ctx->shader->key.as_ls || ctx->shader->key.as_es); + if (HAVE_32BIT_POINTERS) { + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); + list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], + ac_array_in_const32_addr_space(ctx->v4i32), ""); + return list; + } + /* Get the pointer to rw buffers. */ ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, ac_array_in_const_addr_space(ctx->v4i32), ""); return list; } /** * Build the vertex shader prolog function. * @@ -7231,39 +7261,39 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, si_init_function_info(&fninfo); if (ctx->screen->info.chip_class >= GFX9) { add_arg(&fninfo, ARG_SGPR, ctx->i64); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); } else { - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); - add_arg(&fninfo, ARG_SGPR, ctx->i64); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); } @@ -7651,24 +7681,24 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, { struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct si_function_info fninfo; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; int i; struct si_ps_exports exp = {}; si_init_function_info(&fninfo); /* Declare input SGPRs. */ - ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_bindless_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64); - ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64); + ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_bindless_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); + ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF); /* Declare input VGPRs. */ unsigned required_num_params = fninfo.num_sgpr_params + util_bitcount(key->ps_epilog.colors_written) * 4 + key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 3cc49ca..ef4472b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -129,46 +129,57 @@ #ifndef SI_SHADER_H #define SI_SHADER_H #include <llvm-c/Core.h> /* LLVMModuleRef */ #include <llvm-c/TargetMachine.h> #include "tgsi/tgsi_scan.h" #include "util/u_queue.h" #include "ac_binary.h" +#include "ac_llvm_build.h" #include "si_state.h" struct nir_shader; #define SI_MAX_VS_OUTPUTS 40 /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an * index smaller than this. */ #define SI_MAX_IO_GENERIC 46 /* SGPR user data indices */ enum { SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ +#if !HAVE_32BIT_POINTERS SI_SGPR_RW_BUFFERS_HI, +#endif SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI, +#endif SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */ +#if !HAVE_32BIT_POINTERS SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, +#endif SI_SGPR_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS SI_SGPR_SAMPLERS_AND_IMAGES_HI, +#endif SI_NUM_RESOURCE_SGPRS, /* all VS variants */ SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS, +#if !HAVE_32BIT_POINTERS SI_SGPR_VERTEX_BUFFERS_HI, +#endif SI_SGPR_BASE_VERTEX, SI_SGPR_START_INSTANCE, SI_SGPR_DRAWID, SI_SGPR_VS_STATE_BITS, SI_VS_NUM_USER_SGPR, SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS, /* TES */ SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, @@ -183,37 +194,47 @@ enum { GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K, GFX6_TCS_NUM_USER_SGPR, /* GFX9: Merged LS-HS (VS-TCS) only. */ GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_unused_to_align_the_next_pointer, +#endif GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI, +#endif GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI, +#endif GFX9_TCS_NUM_USER_SGPR, /* GFX9: Merged ES-GS (VS-GS or TES-GS). */ GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI, +#endif GFX9_SGPR_GS_SAMPLERS_AND_IMAGES, +#if !HAVE_32BIT_POINTERS GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI, +#endif GFX9_GS_NUM_USER_SGPR, /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, - SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1, + SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2), /* PS only */ SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS, SI_PS_NUM_USER_SGPR, }; /* LLVM function parameter indices */ enum { SI_NUM_RESOURCE_PARAMS = 4, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index ec1de40..4ef55f5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -133,21 +133,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rsrc; if (desc_type == AC_DESC_BUFFER) { index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); } else { assert(desc_type == AC_DESC_IMAGE); } rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); if (desc_type == AC_DESC_IMAGE && dcc_off) rsrc = force_dcc_off(ctx, rsrc); return rsrc; } @@ -1100,33 +1100,33 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, switch (type) { case AC_DESC_IMAGE: /* The image is at [0:7]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); break; case AC_DESC_BUFFER: /* The buffer is in [4:7]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; case AC_DESC_FMASK: /* The FMASK is at [8:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); break; case AC_DESC_SAMPLER: /* The sampler state is at [12:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); list = LLVMBuildPointerCast(builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; } return ac_build_load_to_sgpr(&ctx->ac, list, index); } /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic @@ -1952,21 +1952,21 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, LLVMValueRef resource, addr; /* Ignore src0, because KHR_blend_func_extended disallows multiple render * targets. */ /* Load the image descriptor. */ STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); resource = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); resource = LLVMBuildPointerCast(ctx->ac.builder, resource, - ac_array_in_const_addr_space(ctx->v8i32), ""); + ac_array_in_const32_addr_space(ctx->v8i32), ""); resource = ac_build_load_to_sgpr(&ctx->ac, resource, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); /* Get the current pixel address. */ LLVMValueRef pos_fixed = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_FIXED_PT); LLVMValueRef pos[4] = { LLVMBuildAnd(ctx->ac.builder, pos_fixed, LLVMConstInt(ctx->i32, 0xffff, 0), ""), LLVMBuildLShr(ctx->ac.builder, pos_fixed, LLVMConstInt(ctx->i32, 16, 0), ""), -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev