The driver should ignore the nir option on SI with LLVM <= 0x0500. Marek
On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote: > This will be shared by the TGSI and NIR backends. For simplicity > we leave the SI LLVM 5.0 and lower work around only in the TGSI > backend. > --- > src/gallium/drivers/radeonsi/si_shader.c | 88 > ++++++++++++++++++-------------- > 1 file changed, 49 insertions(+), 39 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index d5607a99d32..62cb7ea7eb5 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2322,6 +2322,49 @@ void si_tgsi_declare_compute_memory(struct > si_shader_context *ctx, > si_declare_compute_memory(ctx); > } > > +static LLVMValueRef load_const_buffer_desc_fast_path(struct > si_shader_context *ctx) > +{ > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_ > buffers); > + struct si_shader_selector *sel = ctx->shader->selector; > + > + /* Do the bounds checking with a descriptor, because > + * doing computation and manual bounds checking of 64-bit > + * addresses generates horrible VALU code with very high > + * VGPR usage and very low SIMD occupancy. > + */ > + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); > + > + LLVMValueRef desc0, desc1; > + if (HAVE_32BIT_POINTERS) { > + desc0 = ptr; > + desc1 = LLVMConstInt(ctx->i32, > + > S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), > 0); > + } else { > + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, > ""); > + desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_0, ""); > + desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, > ctx->i32_1, ""); > + /* Mask out all bits except BASE_ADDRESS_HI. */ > + desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, > + LLVMConstInt(ctx->i32, > ~C_008F04_BASE_ADDRESS_HI, 0), ""); > + } > + > + LLVMValueRef desc_elems[] = { > + desc0, > + desc1, > + LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * > 16, 0), > + LLVMConstInt(ctx->i32, > + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) > | > + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), > 0) > + }; > + > + return ac_build_gather_values(&ctx->ac, desc_elems, 4); > +} > + > static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, > int i) > { > LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, > @@ -2400,8 +2443,6 @@ static LLVMValueRef fetch_constant( > /* Fast path when user data SGPRs point to constant buffer 0 > directly. */ > if (sel->info.const_buffers_declared == 1 && > sel->info.shader_buffers_declared == 0) { > - LLVMValueRef ptr = > - LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > > /* This enables use of s_load_dword and flat_load_dword > for const buffer 0 > * loads, and up to x4 load opcode merging. However, it > leads to horrible > @@ -2416,48 +2457,17 @@ static LLVMValueRef fetch_constant( > * s_buffer_load_dword (that we have to prevent) is when > we use use > * a literal offset where we don't need bounds checking. > */ > - if (ctx->screen->info.chip_class == SI && > - HAVE_LLVM < 0x0600 && > - !reg->Register.Indirect) { > + if (ctx->screen->info.chip_class == SI && HAVE_LLVM < > 0x0600 && > + !reg->Register.Indirect) { > + LLVMValueRef ptr = > + LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > + > addr = LLVMBuildLShr(ctx->ac.builder, addr, > LLVMConstInt(ctx->i32, 2, 0), ""); > LLVMValueRef result = > ac_build_load_invariant(&ctx->ac, ptr, addr); > return bitcast(bld_base, type, result); > } > > - /* Do the bounds checking with a descriptor, because > - * doing computation and manual bounds checking of 64-bit > - * addresses generates horrible VALU code with very high > - * VGPR usage and very low SIMD occupancy. > - */ > - ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, > ctx->ac.intptr, ""); > - > - LLVMValueRef desc0, desc1; > - if (HAVE_32BIT_POINTERS) { > - desc0 = ptr; > - desc1 = LLVMConstInt(ctx->i32, > - > S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), > 0); > - } else { > - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, > ctx->v2i32, ""); > - desc0 = LLVMBuildExtractElement(ctx->ac.builder, > ptr, ctx->i32_0, ""); > - desc1 = LLVMBuildExtractElement(ctx->ac.builder, > ptr, ctx->i32_1, ""); > - /* Mask out all bits except BASE_ADDRESS_HI. */ > - desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, > - LLVMConstInt(ctx->i32, > ~C_008F04_BASE_ADDRESS_HI, 0), ""); > - } > - > - LLVMValueRef desc_elems[] = { > - desc0, > - desc1, > - LLVMConstInt(ctx->i32, > (sel->info.const_file_max[0] + 1) * 16, 0), > - LLVMConstInt(ctx->i32, > - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > - > S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) > | > - > S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), > 0) > - }; > - LLVMValueRef desc = ac_build_gather_values(&ctx->ac, > desc_elems, 4); > + LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx); > LLVMValueRef result = buffer_load_const(ctx, desc, addr); > return bitcast(bld_base, type, result); > } > -- > 2.14.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev