From: Marek Olšák <marek.ol...@amd.com> Users are encouraged to switch to LLVM 6.0 released in March 2018. --- .travis.yml | 24 ++-- configure.ac | 4 +- meson.build | 4 +- src/amd/common/ac_llvm_build.c | 129 +++++----------------- src/amd/common/ac_llvm_helper.cpp | 4 - src/gallium/drivers/radeonsi/si_shader.c | 24 ---- src/gallium/drivers/radeonsi/si_shader_nir.c | 7 -- src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 8 +- 8 files changed, 45 insertions(+), 159 deletions(-)
diff --git a/.travis.yml b/.travis.yml index 012cc91..2f04747 100644 --- a/.travis.yml +++ b/.travis.yml @@ -118,37 +118,37 @@ matrix: - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - libunwind8-dev - env: - LABEL="make Gallium Drivers RadeonSI" - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="true" - - LLVM_VERSION=5.0 + - LLVM_VERSION=6.0 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" - GALLIUM_DRIVERS="radeonsi" - VULKAN_DRIVERS="" - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: sources: - - llvm-toolchain-trusty-5.0 + - llvm-toolchain-trusty-6.0 packages: # LLVM packaging is broken and misses these dependencies - libedit-dev # From sources above - - llvm-5.0-dev + - llvm-6.0-dev # Common - xz-utils - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - libunwind8-dev - env: - LABEL="make Gallium Drivers Other" - BUILD=make @@ -251,47 +251,47 @@ matrix: - libclang-4.0-dev # Common - xz-utils - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - libunwind8-dev - env: # NOTE: Analogous to SWR above, building Clover is quite slow. - - LABEL="make Gallium ST Clover LLVM-5.0" + - LABEL="make Gallium ST Clover LLVM-6.0" - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="true" - - LLVM_VERSION=5.0 + - LLVM_VERSION=6.0 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - OVERRIDE_CC=gcc-4.8 - OVERRIDE_CXX=g++-4.8 - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" - GALLIUM_DRIVERS="r600,radeonsi" - VULKAN_DRIVERS="" - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: sources: - - llvm-toolchain-trusty-5.0 + - llvm-toolchain-trusty-6.0 packages: - libclc-dev # LLVM packaging is broken and misses these dependencies - libedit-dev - g++-4.8 # From sources above - - llvm-5.0-dev - - clang-5.0 - - libclang-5.0-dev + - llvm-6.0-dev + - clang-6.0 + - libclang-6.0-dev # Common - xz-utils - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - libunwind8-dev - env: # NOTE: Analogous to SWR above, building Clover is quite slow. - LABEL="make Gallium ST Clover LLVM-6.0" @@ -359,37 +359,37 @@ matrix: - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - libunwind8-dev - env: - LABEL="make Vulkan" - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check" - - LLVM_VERSION=5.0 + - LLVM_VERSION=6.0 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland" - DRI_DRIVERS="" - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" - GALLIUM_DRIVERS="" - VULKAN_DRIVERS="intel,radeon" - LIBUNWIND_FLAGS="--disable-libunwind" addons: apt: sources: - - llvm-toolchain-trusty-5.0 + - llvm-toolchain-trusty-6.0 packages: # LLVM packaging is broken and misses these dependencies - libedit-dev # From sources above - - llvm-5.0-dev + - llvm-6.0-dev # Common - xz-utils - x11proto-xf86vidmode-dev - libexpat1-dev - libx11-xcb-dev - libelf-dev - env: - LABEL="scons" - BUILD=scons - SCONSFLAGS="-j4" diff --git a/configure.ac b/configure.ac index f135d05..58cf56e 100644 --- a/configure.ac +++ b/configure.ac @@ -101,22 +101,22 @@ XDAMAGE_REQUIRED=1.1 XSHMFENCE_REQUIRED=1.1 XVMC_REQUIRED=1.0.6 PYTHON_MAKO_REQUIRED=0.8.0 LIBSENSORS_REQUIRED=4.0.0 ZLIB_REQUIRED=1.2.3 dnl LLVM versions LLVM_REQUIRED_GALLIUM=3.3.0 LLVM_REQUIRED_OPENCL=3.9.0 LLVM_REQUIRED_R600=3.9.0 -LLVM_REQUIRED_RADEONSI=5.0.0 -LLVM_REQUIRED_RADV=5.0.0 +LLVM_REQUIRED_RADEONSI=6.0.0 +LLVM_REQUIRED_RADV=6.0.0 LLVM_REQUIRED_SWR=5.0.0 dnl Check for progs AC_PROG_CPP AC_PROG_CC AC_PROG_CXX dnl add this here, so the help for this environmnet variable is close to dnl other CC/CXX flags related help AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support (only needed if not enabled by default and different from -std=c++11)]) diff --git a/meson.build b/meson.build index b2722c7..9b5b53e 100644 --- a/meson.build +++ b/meson.build @@ -1140,21 +1140,23 @@ if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 endif endif if with_gallium_opencl llvm_modules += [ 'all-targets', 'linker', 'coverage', 'instrumentation', 'ipo', 'irreader', 'lto', 'option', 'objcarcopts', 'profiledata', ] llvm_optional_modules += ['coroutines', 'opencl'] endif -if with_amd_vk or with_gallium_radeonsi or with_gallium_swr +if with_amd_vk or with_gallium_radeonsi + _llvm_version = '>= 6.0.0' +elif with_gallium_swr _llvm_version = '>= 5.0.0' elif with_gallium_opencl or with_gallium_r600 _llvm_version = '>= 3.9.0' else _llvm_version = '>= 3.3.0' endif _llvm = get_option('llvm') if _llvm == 'auto' dep_llvm = dependency( diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 93ae273..d2ada00 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1819,189 +1819,110 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, { LLVMTypeRef v2f16 = LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2); LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16, args, 2, AC_FUNC_ATTR_READNONE); return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } -/* Upper 16 bits must be zero. */ -static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx, - LLVMValueRef val[2]) -{ - return LLVMBuildOr(ctx->builder, val[0], - LLVMBuildShl(ctx->builder, val[1], - LLVMConstInt(ctx->i32, 16, 0), - ""), ""); -} - -/* Upper 16 bits are ignored and will be dropped. */ -static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx, - LLVMValueRef val[2]) -{ - LLVMValueRef v[2] = { - LLVMBuildAnd(ctx->builder, val[0], - LLVMConstInt(ctx->i32, 0xffff, 0), ""), - val[1], - }; - return ac_llvm_pack_two_int16(ctx, v); -} - LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) { - if (HAVE_LLVM >= 0x0600) { - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); - } - - LLVMValueRef val[2]; - - for (int chan = 0; chan < 2; chan++) { - /* Clamp between [-1, 1]. */ - val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1); - val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1)); - /* Convert to a signed integer in [-32767, 32767]. */ - val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 32767), ""); - /* If positive, add 0.5, else add -0.5. */ - val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], - LLVMBuildSelect(ctx->builder, - LLVMBuildFCmp(ctx->builder, LLVMRealOGE, - val[chan], ctx->f32_0, ""), - LLVMConstReal(ctx->f32, 0.5), - LLVMConstReal(ctx->f32, -0.5), ""), ""); - val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, ""); - } - return ac_llvm_pack_two_int32_as_int16(ctx, val); + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) { - if (HAVE_LLVM >= 0x0600) { - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); - } - - LLVMValueRef val[2]; - - for (int chan = 0; chan < 2; chan++) { - val[chan] = ac_build_clamp(ctx, args[chan]); - val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 65535), ""); - val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 0.5), ""); - val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], - ctx->i32, ""); - } - return ac_llvm_pack_two_int32_as_int16(ctx, val); + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } /* The 8-bit and 10-bit clamping is for HW workarounds. */ LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, bool hi) { assert(bits == 8 || bits == 10 || bits == 16); LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0); LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0); LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1; LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0); - bool has_intrinsic = HAVE_LLVM >= 0x0600; /* Clamp. */ - if (!has_intrinsic || bits != 16) { + if (bits != 16) { for (int i = 0; i < 2; i++) { bool alpha = hi && i == 1; args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb); args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb); } } - if (has_intrinsic) { - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); - } - - return ac_llvm_pack_two_int32_as_int16(ctx, args); + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } /* The 8-bit and 10-bit clamping is for HW workarounds. */ LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits, bool hi) { assert(bits == 8 || bits == 10 || bits == 16); LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0); LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0); - bool has_intrinsic = HAVE_LLVM >= 0x0600; /* Clamp. */ - if (!has_intrinsic || bits != 16) { + if (bits != 16) { for (int i = 0; i < 2; i++) { bool alpha = hi && i == 1; args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb); } } - if (has_intrinsic) { - LLVMValueRef res = - ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", - ctx->v2i16, args, 2, - AC_FUNC_ATTR_READNONE); - return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); - } - - return ac_llvm_pack_two_int16(ctx, args); + LLVMValueRef res = + ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", + ctx->v2i16, args, 2, + AC_FUNC_ATTR_READNONE); + return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) { - assert(HAVE_LLVM >= 0x0600); return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE); } void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) { - if (HAVE_LLVM >= 0x0600) { - ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, - &i1, 1, 0); - return; - } - - LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1, - LLVMConstReal(ctx->f32, 1), - LLVMConstReal(ctx->f32, -1), ""); - ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt, - &value, 1, AC_FUNC_ATTR_LEGACY); + ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, + &i1, 1, 0); } LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, LLVMValueRef width, bool is_signed) { LLVMValueRef args[] = { input, offset, width, @@ -3196,21 +3117,21 @@ ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsign result = ac_build_alu_op(ctx, result, swap, op); return ac_build_wwm(ctx, result); } } LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) { unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); - if (ctx->chip_class >= VI && HAVE_LLVM >= 0x0600) { + if (ctx->chip_class >= VI) { return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); } else { return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); } } LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index) { index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp index 1a2aee3..f715c8f 100644 --- a/src/amd/common/ac_llvm_helper.cpp +++ b/src/amd/common/ac_llvm_helper.cpp @@ -69,21 +69,17 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, llvm::FastMathFlags flags; switch (float_mode) { case AC_FLOAT_MODE_DEFAULT: break; case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH: flags.setNoSignedZeros(); llvm::unwrap(builder)->setFastMathFlags(flags); break; case AC_FLOAT_MODE_UNSAFE_FP_MATH: -#if HAVE_LLVM >= 0x0600 flags.setFast(); -#else - flags.setUnsafeAlgebra(); -#endif llvm::unwrap(builder)->setFastMathFlags(flags); break; } return builder; } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 32db089..7d2fc44 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2443,44 +2443,20 @@ static LLVMValueRef fetch_constant( idx = reg->Register.Index * 4 + swizzle; if (reg->Register.Indirect) { addr = si_get_indirect_index(ctx, ireg, 16, idx * 4); } else { addr = LLVMConstInt(ctx->i32, idx * 4, 0); } /* Fast path when user data SGPRs point to constant buffer 0 directly. */ if (sel->info.const_buffers_declared == 1 && sel->info.shader_buffers_declared == 0) { - - /* This enables use of s_load_dword and flat_load_dword for const buffer 0 - * loads, and up to x4 load opcode merging. However, it leads to horrible - * code reducing SIMD wave occupancy from 8 to 2 in many cases. - * - * Using s_buffer_load_dword (x1) seems to be the best option right now. - * - * LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting - * a descriptor and s_buffer_load_dword using it, so we can't expand - * the pointer into a full descriptor like below. We have to use - * s_load_dword instead. The only case when LLVM 5.0 would select - * s_buffer_load_dword (that we have to prevent) is when we use use - * a literal offset where we don't need bounds checking. - */ - if (ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 && - !reg->Register.Indirect) { - LLVMValueRef ptr = - LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); - - addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), ""); - LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr); - return bitcast(bld_base, type, result); - } - LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx); LLVMValueRef result = buffer_load_const(ctx, desc, addr); return bitcast(bld_base, type, result); } assert(reg->Register.Dimension); buf = reg->Dimension.Index; if (reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 6eb114a..914b901 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -741,27 +741,20 @@ void si_nir_scan_shader(const struct nir_shader *nir, } } /** * Perform "lowering" operations on the NIR that are run once when the shader * selector is created. */ void si_lower_nir(struct si_shader_selector* sel) { - /* Disable const buffer fast path for old LLVM versions */ - if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 && - sel->info.const_buffers_declared == 1 && - sel->info.shader_buffers_declared == 0) { - sel->info.const_buffers_declared |= 0x2; - } - /* Adjust the driver location of inputs and outputs. The state tracker * interprets them as slots, while the ac/nir backend interprets them * as individual components. */ nir_foreach_variable(variable, &sel->nir->inputs) variable->data.driver_location *= 4; nir_foreach_variable(variable, &sel->nir->outputs) { variable->data.driver_location *= 4; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 43922dc..694f934 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -49,25 +49,23 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, emit_data->arg_count = 1; emit_data->args[0] = conds[0]; } void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMBuilderRef builder = ctx->ac.builder; if (ctx->shader->selector->force_correct_derivs_after_kill) { - /* LLVM 6.0 can kill immediately while maintaining WQM. */ - if (HAVE_LLVM >= 0x0600) { - ac_build_kill_if_false(&ctx->ac, - ac_build_wqm_vote(&ctx->ac, visible)); - } + /* Kill immediately while maintaining WQM. */ + ac_build_kill_if_false(&ctx->ac, + ac_build_wqm_vote(&ctx->ac, visible)); LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, ""); mask = LLVMBuildAnd(builder, mask, visible, ""); LLVMBuildStore(builder, mask, ctx->postponed_kill); return; } ac_build_kill_if_false(&ctx->ac, visible); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev