On Wed, Apr 11, 2018 at 4:25 PM, Juan A. Suarez Romero <jasua...@igalia.com> wrote: > Hi, Bas. > > Unfortunately, I can't apply this patch neither in next 17.3 release stable, > nor > in 18.0, as this patch does not apply cleanly, and it requires other different > commits that didn't land in the stable branches.
This backport (https://patchwork.freedesktop.org/patch/214949/) applies cleanly on 17.3 for me. > > > For 17.3 I think it is probably not worth to try to provide a specific version > for the stable, as I'm already cooking the pre-release, and this is the latest > release for 17.3 series. Can we please get this in 17.3? This has been submitted upstream laready for 2 weeks and this backport has been available for a week and fixes one of the major games from Feral on Vega: commit 4503ff760c794c3bb15b978a47c530037d56498e (dow3) Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Mar 28 23:54:40 2018 +0200 Why did I not get a message that that commit could not be applied? > > > Maybe it is worth to provide a version for 18.0 branch, though. Probably it > wouldn't enter in this release, but surely in the next one which should happen > in 1 or 2 weeks. > > > For more information, for 18.0 this patch seems to require 1251f08ef ("ac: add > ac_build_buffer_load_common() helper"), bac9fa9f17f ("ac: add glc parameter to > ac_build_buffer_load_format"), and probably others. > > > J.A. > > > On Wed, 2018-04-11 at 11:26 +0200, Bas Nieuwenhuizen wrote: >> On Wed, Apr 4, 2018 at 10:19 PM, Bas Nieuwenhuizen >> <b...@basnieuwenhuizen.nl> wrote: >> > On GFX9 whether the buffer size is interpreted as elements or bytes >> > depends on whether IDXEN is enabled in the instruction. If the index >> > is a constant zero, LLVM optimizes IDXEN to 0. >> > >> > Now the size in elements is interpreted in bytes which of course >> > results in out of bounds accesses. >> > >> > The correct fix is most likely to disable the LLVM optimization, >> > but we need something to work with LLVM <= 6.0. >> > >> > radeonsi does the max between stride and element count on the CPU >> > but that results in the size intrinsics returning the wrong size >> > for the buffer. This would cause CTS errors for radv. >> > >> > v2: Also include the store changes. >> > >> > Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."' >> > (backported from 4503ff760c794c3bb15b978a47c530037d56498e for 17.3) >> > --- >> > src/amd/common/ac_llvm_build.c | 20 ++++++++++++++++++++ >> > src/amd/common/ac_llvm_build.h | 8 ++++++++ >> > src/amd/common/ac_nir_to_llvm.c | 36 ++++++++++++++++++++++++++++++------ >> > src/amd/common/ac_shader_abi.h | 4 ++++ >> > 4 files changed, 62 insertions(+), 6 deletions(-) >> > >> > diff --git a/src/amd/common/ac_llvm_build.c >> > b/src/amd/common/ac_llvm_build.c >> > index e5cd23e025..f193f71c3e 100644 >> > --- a/src/amd/common/ac_llvm_build.c >> > +++ b/src/amd/common/ac_llvm_build.c >> > @@ -960,6 +960,26 @@ LLVMValueRef ac_build_buffer_load_format(struct >> > ac_llvm_context *ctx, >> > AC_FUNC_ATTR_READONLY); >> > } >> > >> > +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context >> > *ctx, >> > + LLVMValueRef rsrc, >> > + LLVMValueRef vindex, >> > + LLVMValueRef voffset, >> > + bool can_speculate) >> > +{ >> > + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, >> > rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); >> > + LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, >> > LLVMConstInt(ctx->i32, 1, 0), ""); >> > + stride = LLVMBuildLShr(ctx->builder, stride, >> > LLVMConstInt(ctx->i32, 16, 0), ""); >> > + >> > + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder, >> > + >> > LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""), >> > + elem_count, stride, >> > ""); >> > + >> > + LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, >> > new_elem_count, >> > + >> > LLVMConstInt(ctx->i32, 2, 0), ""); >> > + >> > + return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, >> > can_speculate); >> > +} >> > + >> > /** >> > * Set range metadata on an instruction. This can only be used on load >> > and >> > * call instructions. If you know an instruction can only produce the >> > values >> > diff --git a/src/amd/common/ac_llvm_build.h >> > b/src/amd/common/ac_llvm_build.h >> > index aa2a2899ab..d4264f2879 100644 >> > --- a/src/amd/common/ac_llvm_build.h >> > +++ b/src/amd/common/ac_llvm_build.h >> > @@ -188,6 +188,14 @@ LLVMValueRef ac_build_buffer_load_format(struct >> > ac_llvm_context *ctx, >> > LLVMValueRef voffset, >> > bool can_speculate); >> > >> > +/* load_format that handles the stride & element count better if idxen is >> > + * disabled by LLVM. */ >> > +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context >> > *ctx, >> > + LLVMValueRef rsrc, >> > + LLVMValueRef vindex, >> > + LLVMValueRef voffset, >> > + bool can_speculate); >> > + >> > LLVMValueRef >> > ac_get_thread_id(struct ac_llvm_context *ctx); >> > >> > diff --git a/src/amd/common/ac_nir_to_llvm.c >> > b/src/amd/common/ac_nir_to_llvm.c >> > index b696cb8a45..8ee80b1a67 100644 >> > --- a/src/amd/common/ac_nir_to_llvm.c >> > +++ b/src/amd/common/ac_nir_to_llvm.c >> > @@ -2257,11 +2257,19 @@ static LLVMValueRef build_tex_intrinsic(struct >> > ac_nir_context *ctx, >> > struct ac_image_args *args) >> > { >> > if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { >> > - return ac_build_buffer_load_format(&ctx->ac, >> > - args->resource, >> > - args->addr, >> > - >> > LLVMConstInt(ctx->ac.i32, 0, false), >> > - true); >> > + if (ctx->abi->gfx9_stride_size_workaround) { >> > + return >> > ac_build_buffer_load_format_gfx9_safe(&ctx->ac, >> > + >> > args->resource, >> > + >> > args->addr, >> > + >> > ctx->ac.i32_0, >> > + true); >> > + } else { >> > + return ac_build_buffer_load_format(&ctx->ac, >> > + args->resource, >> > + args->addr, >> > + ctx->ac.i32_0, >> > + true); >> > + } >> > } >> > >> > args->opcode = ac_image_sample; >> > @@ -3613,8 +3621,23 @@ static void visit_image_store(struct ac_nir_context >> > *ctx, >> > glc = i1true; >> > >> > if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { >> > + LLVMValueRef rsrc = get_sampler_desc(ctx, >> > instr->variables[0], AC_DESC_BUFFER, true, true); >> > + >> > + if (ctx->abi->gfx9_stride_size_workaround) { >> > + LLVMValueRef elem_count = >> > LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, >> > 2, 0), ""); >> > + LLVMValueRef stride = >> > LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, >> > 1, 0), ""); >> > + stride = LLVMBuildLShr(ctx->ac.builder, stride, >> > LLVMConstInt(ctx->ac.i32, 16, 0), ""); >> > + >> > + LLVMValueRef new_elem_count = >> > LLVMBuildSelect(ctx->ac.builder, >> > + >> > LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""), >> > + >> > elem_count, stride, ""); >> > + >> > + rsrc = LLVMBuildInsertElement(ctx->ac.builder, >> > rsrc, new_elem_count, >> > + >> > LLVMConstInt(ctx->ac.i32, 2, 0), ""); >> > + } >> > + >> > params[0] = ac_to_float(&ctx->ac, get_src(ctx, >> > instr->src[2])); /* data */ >> > - params[1] = get_sampler_desc(ctx, instr->variables[0], >> > AC_DESC_BUFFER, true, true); >> > + params[1] = rsrc; >> > params[2] = LLVMBuildExtractElement(ctx->ac.builder, >> > get_src(ctx, instr->src[0]), >> > ctx->ac.i32_0, ""); /* >> > vindex */ >> > params[3] = ctx->ac.i32_0; /* voffset */ >> > @@ -6645,6 +6668,7 @@ LLVMModuleRef >> > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, >> > ctx.abi.load_ssbo = radv_load_ssbo; >> > ctx.abi.load_sampler_desc = radv_get_sampler_desc; >> > ctx.abi.clamp_shadow_reference = false; >> > + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; >> > >> > if (shader_count >= 2) >> > ac_init_exec_full_mask(&ctx.ac); >> > diff --git a/src/amd/common/ac_shader_abi.h >> > b/src/amd/common/ac_shader_abi.h >> > index 14517d5570..b04dc076de 100644 >> > --- a/src/amd/common/ac_shader_abi.h >> > +++ b/src/amd/common/ac_shader_abi.h >> > @@ -92,6 +92,10 @@ struct ac_shader_abi { >> > /* Whether to clamp the shadow reference value to [0,1]on VI. >> > Radeonsi currently >> > * uses it due to promoting D16 to D32, but radv needs it off. */ >> > bool clamp_shadow_reference; >> > + >> > + /* Whether to workaround GFX9 ignoring the stride for the buffer >> > size if IDXEN=0 >> > + * and LLVM optimizes an indexed load with constant index to >> > IDXEN=0. */ >> > + bool gfx9_stride_size_workaround; >> > }; >> > >> > #endif /* AC_SHADER_ABI_H */ >> > -- >> > 2.16.2 >> > >> >> _______________________________________________ >> mesa-stable mailing list >> mesa-sta...@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-stable _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev