SI doesn't support buffer_load_dwordx3 and buffer_store_dwordx3, but it supports buffer_load_format_xyz and buffer_store_format_xyz.
Marek On Mon, Jun 3, 2019 at 9:09 AM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > I thought LLVM was able to handle that itself but actually it > does not. That means we shouldn't try to emit vec3 on SI because > it's unsupported. > > Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with LLVM 9+")" > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/common/ac_llvm_build.c | 12 ++++++------ > src/amd/common/ac_llvm_util.h | 9 +++++++++ > src/amd/common/ac_nir_to_llvm.c | 3 ++- > src/amd/vulkan/radv_nir_to_llvm.c | 2 +- > 4 files changed, 18 insertions(+), 8 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c > b/src/amd/common/ac_llvm_build.c > index 613c1eef942..7f5c8ef873c 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct > ac_llvm_context *ctx, > args[idx++] = voffset ? voffset : ctx->i32_0; > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > + unsigned func = !ac_has_vec3_support(ctx->chip_class) && > num_channels == 3 ? 4 : num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context > *ctx, > { > /* Split 3 channel stores, because only LLVM 9+ support 3-channel > * intrinsics. */ > - if (num_channels == 3 && HAVE_LLVM < 0x900) { > + if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class)) { > LLVMValueRef v[3], v01; > > for (int i = 0; i < 3; i++) { > @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct > ac_llvm_context *ctx, > args[idx++] = voffset ? voffset : ctx->i32_0; > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > + unsigned func = !ac_has_vec3_support(ctx->chip_class) && > num_channels == 3 ? 4 : num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, > if (num_channels == 1) > return result[0]; > > - if (num_channels == 3 && HAVE_LLVM < 0x900) > + if (num_channels == 3 && > !ac_has_vec3_support(ctx->chip_class)) > result[num_channels++] = LLVMGetUndef(ctx->f32); > return ac_build_gather_values(ctx, result, num_channels); > } > @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context > *ctx, > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > + unsigned func = !ac_has_vec3_support(ctx->chip_class) && > num_channels == 3 ? 4 : num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context > *ctx, > args[idx++] = soffset ? soffset : ctx->i32_0; > args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); > args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : > 0), 0); > - unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4 : > num_channels; > + unsigned func = !ac_has_vec3_support(ctx->chip_class) && > num_channels == 3 ? 4 : num_channels; > const char *indexing_kind = structurized ? "struct" : "raw"; > char name[256], type_name[8]; > > diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h > index ca00540da80..a45647a3360 100644 > --- a/src/amd/common/ac_llvm_util.h > +++ b/src/amd/common/ac_llvm_util.h > @@ -146,6 +146,15 @@ bool ac_compile_module_to_binary(struct > ac_compiler_passes *p, LLVMModuleRef mod > void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr); > void ac_enable_global_isel(LLVMTargetMachineRef tm); > > +static inline bool > +ac_has_vec3_support(enum chip_class chip) > +{ > + if (chip == GFX6) > + return false; > + > + return HAVE_LLVM >= 0x900; > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/src/amd/common/ac_nir_to_llvm.c > b/src/amd/common/ac_nir_to_llvm.c > index 51f92a6b062..429dac63d63 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct ac_nir_context > *ctx, > > /* Due to an LLVM limitation with LLVM < 9, split 3-element > * writes into a 2-element and a 1-element write. */ > - if (count == 3 && (elem_size_bytes != 4 || HAVE_LLVM < > 0x900)) { > + if (count == 3 && > + (elem_size_bytes != 4 || > !ac_has_vec3_support(ctx->ac.chip_class))) { > writemask |= 1 << (start + 2); > count = 2; > } > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index dca4bebcdd1..ab552ae34ab 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct radv_shader_context > *ctx, > /* fall through */ > case 4: /* as v4i32 */ > vdata = ac_build_gather_values(&ctx->ac, out, > - HAVE_LLVM < 0x900 ? > + > !ac_has_vec3_support(ctx->ac.chip_class) ? > > util_next_power_of_two(num_comps) : > num_comps); > break; > -- > 2.21.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev