I think this is done with https://patchwork.freedesktop.org/series/55025/
? On Sat, Jan 12, 2019 at 12:53 AM Marek Olšák <mar...@gmail.com> wrote: > > From: Marek Olšák <marek.ol...@amd.com> > > --- > src/amd/common/ac_llvm_build.c | 18 +++++++++++++----- > .../drivers/radeonsi/si_shader_tgsi_mem.c | 4 ++-- > 2 files changed, 15 insertions(+), 7 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 76047148a6a..c0d90ada2be 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -1234,25 +1234,33 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, > if (allow_smem && !glc && !slc) { > assert(vindex == NULL); > > LLVMValueRef result[8]; > > for (int i = 0; i < num_channels; i++) { > if (i) { > offset = LLVMBuildAdd(ctx->builder, offset, > LLVMConstInt(ctx->i32, > 4, 0), ""); > } > - LLVMValueRef args[2] = {rsrc, offset}; > - result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > - ctx->f32, args, 2, > - AC_FUNC_ATTR_READNONE | > - AC_FUNC_ATTR_LEGACY); > + > + if (HAVE_LLVM >= 0x0800) { > + LLVMValueRef args[3] = {rsrc, offset, > ctx->i32_0}; > + result[i] = ac_build_intrinsic(ctx, > "llvm.amdgcn.s.buffer.load.i32", > + ctx->f32, > args, 3, > + > AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef args[2] = {rsrc, offset}; > + result[i] = ac_build_intrinsic(ctx, > "llvm.SI.load.const.v4i32", > + ctx->f32, > args, 2, > + > AC_FUNC_ATTR_READNONE | > + > AC_FUNC_ATTR_LEGACY); > + } > } > if (num_channels == 1) > return result[0]; > > if (num_channels == 3) > result[num_channels++] = LLVMGetUndef(ctx->f32); > return ac_build_gather_values(ctx, result, num_channels); > } > > return ac_build_buffer_load_common(ctx, rsrc, vindex, offset, > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > index 727def56f65..2f49685c642 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c > @@ -533,24 +533,24 @@ static void load_emit( > info->images_store | > info->images_atomic, > > info->uses_bindless_buffer_store | > > info->uses_bindless_buffer_atomic, > > info->uses_bindless_image_store | > > info->uses_bindless_image_atomic); > args.cache_policy = get_cache_policy(ctx, inst, false, false, false); > > if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { > /* Don't use SMEM for shader buffer loads, because LLVM > doesn't > - * select SMEM for SI.load.const with a non-constant offset, > and > + * select SMEM for amdgcn.s.buffer.load with a non-constant > offset, and > * constant offsets practically don't exist with shader > buffers. > * > - * Also, SI.load.const doesn't use inst_offset when it's > lowered > + * Also, amdgcn.s.buffer.load doesn't use inst_offset when > it's lowered > * to VMEM, so we just end up with more VALU instructions in > the end > * and no benefit. > * > * TODO: Remove this line once LLVM can select SMEM with a > non-constant > * offset, and can derive inst_offset when VMEM is > selected. > * After that, si_memory_barrier should invalidate sL1 > for shader > * buffers. > */ > emit_data->output[emit_data->chan] = > ac_build_buffer_load(&ctx->ac, args.resource, > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev