On Wed, Jan 11, 2017 at 1:45 AM, Grazvydas Ignotas <nota...@gmail.com> wrote: > Unfortunately this one breaks at least (surprise!) texturecubemap > SaschaWillemsVulkan demo. > I recommend you try it yourself, there are even precompiled binaries > available (see README.md): > https://github.com/SaschaWillems/Vulkan
As far as I can see reverting the offset back to 1.5 (as Nicolai noted on patch 1) also fixes that vulkan demo. - Bas > > Gražvydas > > On Tue, Jan 10, 2017 at 5:12 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: >> From: Nicolai Hähnle <nicolai.haeh...@amd.com> >> >> Code is taken from a combination of radv (for the more basic functions, >> to avoid gallivm dependencies) and radeonsi (for the new and improved >> derivative calculations). >> --- >> src/amd/common/ac_llvm_util.c | 362 >> +++++++++++++++++++++ >> src/amd/common/ac_llvm_util.h | 57 ++++ >> src/amd/common/ac_nir_to_llvm.c | 204 +----------- >> src/gallium/drivers/radeonsi/si_shader.c | 6 +- >> src/gallium/drivers/radeonsi/si_shader_internal.h | 2 + >> .../drivers/radeonsi/si_shader_tgsi_setup.c | 4 + >> 6 files changed, 438 insertions(+), 197 deletions(-) >> >> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c >> index a8408dd..6dd6cfa 100644 >> --- a/src/amd/common/ac_llvm_util.c >> +++ b/src/amd/common/ac_llvm_util.c >> @@ -25,20 +25,23 @@ >> /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ >> #include "ac_llvm_util.h" >> >> #include <llvm-c/Core.h> >> >> #include "c11/threads.h" >> >> #include <assert.h> >> #include <stdio.h> >> >> +#include "util/bitscan.h" >> +#include "util/macros.h" >> + >> static void ac_init_llvm_target() >> { >> #if HAVE_LLVM < 0x0307 >> LLVMInitializeR600TargetInfo(); >> LLVMInitializeR600Target(); >> LLVMInitializeR600TargetMC(); >> LLVMInitializeR600AsmPrinter(); >> #else >> LLVMInitializeAMDGPUTargetInfo(); >> LLVMInitializeAMDGPUTarget(); >> @@ -133,10 +136,369 @@ LLVMTargetMachineRef ac_create_target_machine(enum >> radeon_family family) >> target, >> triple, >> ac_get_llvm_processor_name(family), >> "+DumpCode,+vgpr-spilling", >> LLVMCodeGenLevelDefault, >> LLVMRelocDefault, >> LLVMCodeModelDefault); >> >> return tm; >> } >> + >> +/* Initialize module-independent parts of the context. >> + * >> + * The caller is responsible for initializing ctx::module and ctx::builder. >> + */ >> +void >> +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) >> +{ >> + LLVMValueRef args[1]; >> + >> + ctx->context = context; >> + ctx->module = NULL; >> + ctx->builder = NULL; >> + >> + ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); >> + ctx->f32 = LLVMFloatTypeInContext(ctx->context); >> + >> + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, >> "fpmath", 6); >> + >> + args[0] = LLVMConstReal(ctx->f32, 2.5); >> + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); >> +} >> + >> +#if HAVE_LLVM < 0x0400 >> +static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) >> +{ >> + switch (attr) { >> + case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; >> + case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; >> + case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; >> + case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; >> + case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; >> + case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; >> + case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; >> + default: >> + fprintf(stderr, "Unhandled function attribute: %x\n", attr); >> + return 0; >> + } >> +} >> + >> +#else >> + >> +static const char *attr_to_str(enum ac_func_attr attr) >> +{ >> + switch (attr) { >> + case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; >> + case AC_FUNC_ATTR_BYVAL: return "byval"; >> + case AC_FUNC_ATTR_INREG: return "inreg"; >> + case AC_FUNC_ATTR_NOALIAS: return "noalias"; >> + case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; >> + case AC_FUNC_ATTR_READNONE: return "readnone"; >> + case AC_FUNC_ATTR_READONLY: return "readonly"; >> + default: >> + fprintf(stderr, "Unhandled function attribute: %x\n", attr); >> + return 0; >> + } >> +} >> + >> +#endif >> + >> +void >> +ac_add_function_attr(LLVMValueRef function, >> + int attr_idx, >> + enum ac_func_attr attr) >> +{ >> + >> +#if HAVE_LLVM < 0x0400 >> + LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); >> + if (attr_idx == -1) { >> + LLVMAddFunctionAttr(function, llvm_attr); >> + } else { >> + LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); >> + } >> +#else >> + LLVMContextRef context = >> LLVMGetModuleContext(LLVMGetGlobalParent(function)); >> + const char *attr_name = attr_to_str(attr); >> + unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, >> + strlen(attr_name)); >> + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, >> 0); >> + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); >> +#endif >> +} >> + >> +LLVMValueRef >> +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, >> + LLVMTypeRef return_type, LLVMValueRef *params, >> + unsigned param_count, unsigned attrib_mask) >> +{ >> + LLVMValueRef function; >> + >> + function = LLVMGetNamedFunction(ctx->module, name); >> + if (!function) { >> + LLVMTypeRef param_types[32], function_type; >> + unsigned i; >> + >> + assert(param_count <= 32); >> + >> + for (i = 0; i < param_count; ++i) { >> + assert(params[i]); >> + param_types[i] = LLVMTypeOf(params[i]); >> + } >> + function_type = >> + LLVMFunctionType(return_type, param_types, param_count, >> 0); >> + function = LLVMAddFunction(ctx->module, name, function_type); >> + >> + LLVMSetFunctionCallConv(function, LLVMCCallConv); >> + LLVMSetLinkage(function, LLVMExternalLinkage); >> + >> + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; >> + while (attrib_mask) { >> + enum ac_func_attr attr = 1u << >> u_bit_scan(&attrib_mask); >> + ac_add_function_attr(function, -1, attr); >> + } >> + } >> + return LLVMBuildCall(ctx->builder, function, params, param_count, >> ""); >> +} >> + >> +LLVMValueRef >> +ac_build_gather_values_extended(struct ac_llvm_context *ctx, >> + LLVMValueRef *values, >> + unsigned value_count, >> + unsigned value_stride, >> + bool load) >> +{ >> + LLVMBuilderRef builder = ctx->builder; >> + LLVMValueRef vec; >> + unsigned i; >> + >> + >> + if (value_count == 1) { >> + if (load) >> + return LLVMBuildLoad(builder, values[0], ""); >> + return values[0]; >> + } else if (!value_count) >> + unreachable("value_count is 0"); >> + >> + for (i = 0; i < value_count; i++) { >> + LLVMValueRef value = values[i * value_stride]; >> + if (load) >> + value = LLVMBuildLoad(builder, value, ""); >> + >> + if (!i) >> + vec = LLVMGetUndef( >> LLVMVectorType(LLVMTypeOf(value), value_count)); >> + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); >> + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); >> + } >> + return vec; >> +} >> + >> +LLVMValueRef >> +ac_build_gather_values(struct ac_llvm_context *ctx, >> + LLVMValueRef *values, >> + unsigned value_count) >> +{ >> + return ac_build_gather_values_extended(ctx, values, value_count, 1, >> false); >> +} >> + >> +LLVMValueRef >> +ac_emit_fdiv(struct ac_llvm_context *ctx, >> + LLVMValueRef num, >> + LLVMValueRef den) >> +{ >> + LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); >> + >> + if (!LLVMIsConstant(ret)) >> + LLVMSetMetadata(ret, ctx->fpmath_md_kind, >> ctx->fpmath_md_2p5_ulp); >> + return ret; >> +} >> + >> +/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 >> + * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is >> + * already multiplied by two. id is the cube face number. >> + */ >> +struct cube_selection_coords { >> + LLVMValueRef stc[2]; >> + LLVMValueRef ma; >> + LLVMValueRef id; >> +}; >> + >> +static void >> +build_cube_intrinsic(struct ac_llvm_context *ctx, >> + LLVMValueRef in[3], >> + struct cube_selection_coords *out) >> +{ >> + LLVMBuilderRef builder = ctx->builder; >> + >> + if (HAVE_LLVM >= 0x0309) { >> + LLVMTypeRef f32 = ctx->f32; >> + >> + out->stc[1] = ac_emit_llvm_intrinsic(ctx, >> "llvm.amdgcn.cubetc", >> + f32, in, 3, AC_FUNC_ATTR_READNONE); >> + out->stc[0] = ac_emit_llvm_intrinsic(ctx, >> "llvm.amdgcn.cubesc", >> + f32, in, 3, AC_FUNC_ATTR_READNONE); >> + out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", >> + f32, in, 3, AC_FUNC_ATTR_READNONE); >> + out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", >> + f32, in, 3, AC_FUNC_ATTR_READNONE); >> + } else { >> + LLVMValueRef c[4] = { >> + in[0], >> + in[1], >> + in[2], >> + LLVMGetUndef(LLVMTypeOf(in[0])) >> + }; >> + LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); >> + >> + LLVMValueRef tmp = >> + ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", >> + LLVMTypeOf(vec), &vec, 1, >> + AC_FUNC_ATTR_READNONE); >> + >> + out->stc[1] = LLVMBuildExtractElement(builder, tmp, >> + LLVMConstInt(ctx->i32, 0, 0), ""); >> + out->stc[0] = LLVMBuildExtractElement(builder, tmp, >> + LLVMConstInt(ctx->i32, 1, 0), ""); >> + out->ma = LLVMBuildExtractElement(builder, tmp, >> + LLVMConstInt(ctx->i32, 2, 0), ""); >> + out->id = LLVMBuildExtractElement(builder, tmp, >> + LLVMConstInt(ctx->i32, 3, 0), ""); >> + } >> +} >> + >> +/** >> + * Build a manual selection sequence for cube face sc/tc coordinates and >> + * major axis vector (multiplied by 2 for consistency) for the given >> + * vec3 \p coords, for the face implied by \p selcoords. >> + * >> + * For the major axis, we always adjust the sign to be in the direction of >> + * selcoords.ma; i.e., a positive out_ma means that coords is pointed >> towards >> + * the selcoords major axis. >> + */ >> +static void build_cube_select(LLVMBuilderRef builder, >> + const struct cube_selection_coords *selcoords, >> + const LLVMValueRef *coords, >> + LLVMValueRef *out_st, >> + LLVMValueRef *out_ma) >> +{ >> + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); >> + LLVMValueRef is_ma_positive; >> + LLVMValueRef sgn_ma; >> + LLVMValueRef is_ma_z, is_not_ma_z; >> + LLVMValueRef is_ma_y; >> + LLVMValueRef is_ma_x; >> + LLVMValueRef sgn; >> + LLVMValueRef tmp; >> + >> + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, >> + selcoords->ma, LLVMConstReal(f32, 0.0), ""); >> + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, >> + LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); >> + >> + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, >> LLVMConstReal(f32, 4.0), ""); >> + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); >> + is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, >> + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, >> LLVMConstReal(f32, 2.0), ""), ""); >> + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, >> is_ma_y, ""), ""); >> + >> + /* Select sc */ >> + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); >> + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), >> + LLVMBuildSelect(builder, is_ma_x, sgn_ma, >> + LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); >> + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); >> + >> + /* Select tc */ >> + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); >> + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, >> sgn_ma, ""), >> + LLVMConstReal(f32, -1.0), ""); >> + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); >> + >> + /* Select ma */ >> + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], >> + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), >> ""); >> + sgn = LLVMBuildSelect(builder, is_ma_positive, >> + LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); >> + *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); >> +} >> + >> +void >> +ac_prepare_cube_coords(struct ac_llvm_context *ctx, >> + bool is_deriv, bool is_array, >> + LLVMValueRef *coords_arg, >> + LLVMValueRef *derivs_arg) >> +{ >> + >> + LLVMBuilderRef builder = ctx->builder; >> + struct cube_selection_coords selcoords; >> + LLVMValueRef coords[4]; >> + LLVMValueRef invma; >> + >> + build_cube_intrinsic(ctx, coords_arg, &selcoords); >> + >> + invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", >> + ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); >> + invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); >> + >> + for (int i = 0; i < 2; ++i) { >> + coords[i] = >> + LLVMBuildFAdd(builder, >> + LLVMBuildFMul(builder, selcoords.stc[i], >> invma, ""), >> + LLVMConstReal(ctx->f32, 0.5), ""); >> + } >> + >> + coords[2] = selcoords.id; >> + >> + if (is_deriv && derivs_arg) { >> + LLVMValueRef derivs[4]; >> + int axis; >> + >> + /* Convert cube derivatives to 2D derivatives. */ >> + for (axis = 0; axis < 2; axis++) { >> + LLVMValueRef deriv_st[2]; >> + LLVMValueRef deriv_ma; >> + >> + /* Transform the derivative alongside the texture >> + * coordinate. Mathematically, the correct formula is >> + * as follows. Assume we're projecting onto the +Z >> face >> + * and denote by dx/dh the derivative of the >> (original) >> + * X texture coordinate with respect to horizontal >> + * window coordinates. The projection onto the +Z >> face >> + * plane is: >> + * >> + * f(x,z) = x/z >> + * >> + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh >> + * = 1/z * dx/dh - x/z * 1/z * dz/dh. >> + * >> + * This motivatives the implementation below. >> + * >> + * Whether this actually gives the expected results >> for >> + * apps that might feed in derivatives obtained via >> + * finite differences is anyone's guess. The OpenGL >> spec >> + * seems awfully quiet about how textureGrad for cube >> + * maps should be handled. >> + */ >> + build_cube_select(builder, &selcoords, >> &derivs_arg[axis * 3], >> + deriv_st, &deriv_ma); >> + >> + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, >> ""); >> + >> + for (int i = 0; i < 2; ++i) >> + derivs[axis * 2 + i] = >> + LLVMBuildFSub(builder, >> + LLVMBuildFMul(builder, >> deriv_st[i], invma, ""), >> + LLVMBuildFMul(builder, >> deriv_ma, coords[i], ""), ""); >> + } >> + >> + memcpy(derivs_arg, derivs, sizeof(derivs)); >> + } >> + >> + if (is_array) { >> + /* for cube arrays coord.z = coord.w(array_index) * 8 + face >> */ >> + /* coords_arg.w component - array_index for cube arrays */ >> + LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, >> coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); >> + coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); >> + } >> + >> + memcpy(coords_arg, coords, sizeof(coords)); >> +} >> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h >> index d9ea9bd..802c266 100644 >> --- a/src/amd/common/ac_llvm_util.h >> +++ b/src/amd/common/ac_llvm_util.h >> @@ -26,18 +26,75 @@ >> >> #include <stdbool.h> >> #include <llvm-c/TargetMachine.h> >> >> #include "amd_family.h" >> >> #ifdef __cplusplus >> extern "C" { >> #endif >> >> +enum ac_func_attr { >> + AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), >> + AC_FUNC_ATTR_BYVAL = (1 << 1), >> + AC_FUNC_ATTR_INREG = (1 << 2), >> + AC_FUNC_ATTR_NOALIAS = (1 << 3), >> + AC_FUNC_ATTR_NOUNWIND = (1 << 4), >> + AC_FUNC_ATTR_READNONE = (1 << 5), >> + AC_FUNC_ATTR_READONLY = (1 << 6), >> + AC_FUNC_ATTR_LAST = (1 << 7) >> +}; >> + >> +struct ac_llvm_context { >> + LLVMContextRef context; >> + LLVMModuleRef module; >> + LLVMBuilderRef builder; >> + >> + LLVMTypeRef i32; >> + LLVMTypeRef f32; >> + >> + unsigned fpmath_md_kind; >> + LLVMValueRef fpmath_md_2p5_ulp; >> +}; >> + >> LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family); >> >> void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); >> bool ac_is_sgpr_param(LLVMValueRef param); >> >> +void >> +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context); >> + >> +void >> +ac_add_function_attr(LLVMValueRef function, >> + int attr_idx, >> + enum ac_func_attr attr); >> +LLVMValueRef >> +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, >> + LLVMTypeRef return_type, LLVMValueRef *params, >> + unsigned param_count, unsigned attrib_mask); >> + >> +LLVMValueRef >> +ac_build_gather_values_extended(struct ac_llvm_context *ctx, >> + LLVMValueRef *values, >> + unsigned value_count, >> + unsigned value_stride, >> + bool load); >> +LLVMValueRef >> +ac_build_gather_values(struct ac_llvm_context *ctx, >> + LLVMValueRef *values, >> + unsigned value_count); >> + >> +LLVMValueRef >> +ac_emit_fdiv(struct ac_llvm_context *ctx, >> + LLVMValueRef num, >> + LLVMValueRef den); >> + >> +void >> +ac_prepare_cube_coords(struct ac_llvm_context *ctx, >> + bool is_deriv, bool is_array, >> + LLVMValueRef *coords_arg, >> + LLVMValueRef *derivs_arg); >> + >> #ifdef __cplusplus >> } >> #endif >> diff --git a/src/amd/common/ac_nir_to_llvm.c >> b/src/amd/common/ac_nir_to_llvm.c >> index f214fcd..0fd8559 100644 >> --- a/src/amd/common/ac_nir_to_llvm.c >> +++ b/src/amd/common/ac_nir_to_llvm.c >> @@ -44,20 +44,21 @@ enum radeon_llvm_calling_convention { >> #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) >> >> enum desc_type { >> DESC_IMAGE, >> DESC_FMASK, >> DESC_SAMPLER, >> DESC_BUFFER, >> }; >> >> struct nir_to_llvm_context { >> + struct ac_llvm_context ac; >> const struct ac_nir_compiler_options *options; >> struct ac_shader_variant_info *shader_info; >> >> LLVMContextRef context; >> LLVMModuleRef module; >> LLVMBuilderRef builder; >> LLVMValueRef main_function; >> >> struct hash_table *defs; >> struct hash_table *phis; >> @@ -134,91 +135,20 @@ struct nir_to_llvm_context { >> bool has_ds_bpermute; >> }; >> >> struct ac_tex_info { >> LLVMValueRef args[12]; >> int arg_count; >> LLVMTypeRef dst_type; >> bool has_offset; >> }; >> >> -enum ac_func_attr { >> - AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), >> - AC_FUNC_ATTR_BYVAL = (1 << 1), >> - AC_FUNC_ATTR_INREG = (1 << 2), >> - AC_FUNC_ATTR_NOALIAS = (1 << 3), >> - AC_FUNC_ATTR_NOUNWIND = (1 << 4), >> - AC_FUNC_ATTR_READNONE = (1 << 5), >> - AC_FUNC_ATTR_READONLY = (1 << 6), >> - AC_FUNC_ATTR_LAST = (1 << 7) >> -}; >> - >> -#if HAVE_LLVM < 0x0400 >> -static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) >> -{ >> - switch (attr) { >> - case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; >> - case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; >> - case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; >> - case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; >> - case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; >> - case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; >> - case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; >> - default: >> - fprintf(stderr, "Unhandled function attribute: %x\n", attr); >> - return 0; >> - } >> -} >> - >> -#else >> - >> -static const char *attr_to_str(enum ac_func_attr attr) >> -{ >> - switch (attr) { >> - case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; >> - case AC_FUNC_ATTR_BYVAL: return "byval"; >> - case AC_FUNC_ATTR_INREG: return "inreg"; >> - case AC_FUNC_ATTR_NOALIAS: return "noalias"; >> - case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; >> - case AC_FUNC_ATTR_READNONE: return "readnone"; >> - case AC_FUNC_ATTR_READONLY: return "readonly"; >> - default: >> - fprintf(stderr, "Unhandled function attribute: %x\n", attr); >> - return 0; >> - } >> -} >> - >> -#endif >> - >> -static void >> -ac_add_function_attr(LLVMValueRef function, >> - int attr_idx, >> - enum ac_func_attr attr) >> -{ >> - >> -#if HAVE_LLVM < 0x0400 >> - LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); >> - if (attr_idx == -1) { >> - LLVMAddFunctionAttr(function, llvm_attr); >> - } else { >> - LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); >> - } >> -#else >> - LLVMContextRef context = >> LLVMGetModuleContext(LLVMGetGlobalParent(function)); >> - const char *attr_name = attr_to_str(attr); >> - unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, >> - strlen(attr_name)); >> - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, >> 0); >> - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); >> -#endif >> -} >> - >> static LLVMValueRef >> emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name, >> LLVMTypeRef return_type, LLVMValueRef *params, >> unsigned param_count, unsigned attr_mask); >> static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, >> nir_deref_var *deref, >> enum desc_type desc_type); >> static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) >> { >> return (index * 4) + chan; >> @@ -3297,144 +3227,20 @@ static void tex_fetch_ptrs(struct >> nir_to_llvm_context *ctx, >> else >> *samp_ptr = get_sampler_desc(ctx, instr->texture, >> DESC_SAMPLER); >> if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) >> *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, >> *samp_ptr); >> } >> if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || >> instr->op == >> nir_texop_samples_identical)) >> *fmask_ptr = get_sampler_desc(ctx, instr->texture, >> DESC_FMASK); >> } >> >> -static LLVMValueRef build_cube_intrinsic(struct nir_to_llvm_context *ctx, >> - LLVMValueRef *in) >> -{ >> - >> - LLVMValueRef v, cube_vec; >> - >> - if (1) { >> - LLVMTypeRef f32 = LLVMTypeOf(in[0]); >> - LLVMValueRef out[4]; >> - >> - out[0] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc", >> - f32, in, 3, >> AC_FUNC_ATTR_READNONE); >> - out[1] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc", >> - f32, in, 3, >> AC_FUNC_ATTR_READNONE); >> - out[2] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", >> - f32, in, 3, >> AC_FUNC_ATTR_READNONE); >> - out[3] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", >> - f32, in, 3, >> AC_FUNC_ATTR_READNONE); >> - >> - return build_gather_values(ctx, out, 4); >> - } else { >> - LLVMValueRef c[4]; >> - c[0] = in[0]; >> - c[1] = in[1]; >> - c[2] = in[2]; >> - c[3] = LLVMGetUndef(LLVMTypeOf(in[0])); >> - cube_vec = build_gather_values(ctx, c, 4); >> - v = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", >> LLVMTypeOf(cube_vec), >> - &cube_vec, 1, AC_FUNC_ATTR_READNONE); >> - } >> - return v; >> -} >> - >> -static void cube_to_2d_coords(struct nir_to_llvm_context *ctx, >> - LLVMValueRef *in, LLVMValueRef *out) >> -{ >> - LLVMValueRef coords[4]; >> - LLVMValueRef mad_args[3]; >> - LLVMValueRef v; >> - LLVMValueRef tmp; >> - int i; >> - >> - v = build_cube_intrinsic(ctx, in); >> - for (i = 0; i < 4; i++) >> - coords[i] = LLVMBuildExtractElement(ctx->builder, v, >> - LLVMConstInt(ctx->i32, >> i, false), ""); >> - >> - coords[2] = emit_llvm_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, >> - &coords[2], 1, >> AC_FUNC_ATTR_READNONE); >> - coords[2] = emit_fdiv(ctx, ctx->f32one, coords[2]); >> - >> - mad_args[1] = coords[2]; >> - mad_args[2] = LLVMConstReal(ctx->f32, 1.5); >> - mad_args[0] = coords[0]; >> - >> - /* emit MAD */ >> - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); >> - coords[0] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); >> - >> - mad_args[0] = coords[1]; >> - >> - /* emit MAD */ >> - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); >> - coords[1] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); >> - >> - /* apply xyz = yxw swizzle to cooords */ >> - out[0] = coords[1]; >> - out[1] = coords[0]; >> - out[2] = coords[3]; >> -} >> - >> -static void emit_prepare_cube_coords(struct nir_to_llvm_context *ctx, >> - LLVMValueRef *coords_arg, int >> num_coords, >> - bool is_deriv, >> - bool is_array, LLVMValueRef *derivs_arg) >> -{ >> - LLVMValueRef coords[4]; >> - int i; >> - cube_to_2d_coords(ctx, coords_arg, coords); >> - >> - if (is_deriv && derivs_arg) { >> - LLVMValueRef derivs[4]; >> - int axis; >> - >> - /* Convert cube derivatives to 2D derivatives. */ >> - for (axis = 0; axis < 2; axis++) { >> - LLVMValueRef shifted_cube_coords[4], >> shifted_coords[4]; >> - >> - /* Shift the cube coordinates by the derivatives to >> get >> - * the cube coordinates of the "neighboring pixel". >> - */ >> - for (i = 0; i < 3; i++) >> - shifted_cube_coords[i] = >> - LLVMBuildFAdd(ctx->builder, >> coords_arg[i], >> - derivs_arg[axis*3+i], >> ""); >> - shifted_cube_coords[3] = LLVMGetUndef(ctx->f32); >> - >> - /* Project the shifted cube coordinates onto the >> face. */ >> - cube_to_2d_coords(ctx, shifted_cube_coords, >> - shifted_coords); >> - >> - /* Subtract both sets of 2D coordinates to get 2D >> derivatives. >> - * This won't work if the shifted coordinates ended >> up >> - * in a different face. >> - */ >> - for (i = 0; i < 2; i++) >> - derivs[axis * 2 + i] = >> - LLVMBuildFSub(ctx->builder, >> shifted_coords[i], >> - coords[i], ""); >> - } >> - >> - memcpy(derivs_arg, derivs, sizeof(derivs)); >> - } >> - >> - if (is_array) { >> - /* for cube arrays coord.z = coord.w(array_index) * 8 + face >> */ >> - /* coords_arg.w component - array_index for cube arrays */ >> - LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, >> coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); >> - coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); >> - } >> - >> - memcpy(coords_arg, coords, sizeof(coords)); >> -} >> - >> static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) >> { >> LLVMValueRef result = NULL; >> struct ac_tex_info tinfo = { 0 }; >> unsigned dmask = 0xf; >> LLVMValueRef address[16]; >> LLVMValueRef coords[5]; >> LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; >> LLVMValueRef bias = NULL, offsets = NULL; >> LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = >> NULL; >> @@ -3562,21 +3368,23 @@ static void visit_tex(struct nir_to_llvm_context >> *ctx, nir_tex_instr *instr) >> derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, >> ddx, i)); >> derivs[i * 2 + 1] = to_float(ctx, >> llvm_extract_elem(ctx, ddy, i)); >> } >> } >> >> if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { >> for (chan = 0; chan < instr->coord_components; chan++) >> coords[chan] = to_float(ctx, coords[chan]); >> if (instr->coord_components == 3) >> coords[3] = LLVMGetUndef(ctx->f32); >> - emit_prepare_cube_coords(ctx, coords, >> instr->coord_components, instr->op == nir_texop_txd, instr->is_array, >> derivs); >> + ac_prepare_cube_coords(&ctx->ac, >> + instr->op == nir_texop_txd, instr->is_array, >> + coords, derivs); >> if (num_deriv_comp) >> num_deriv_comp--; >> } >> >> if (ddx || ddy) { >> for (unsigned i = 0; i < num_deriv_comp * 2; i++) >> address[count++] = derivs[i]; >> } >> >> /* Pack texture coordinates */ >> @@ -4672,28 +4480,32 @@ LLVMModuleRef >> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, >> const struct ac_nir_compiler_options >> *options) >> { >> struct nir_to_llvm_context ctx = {0}; >> struct nir_function *func; >> unsigned i; >> ctx.options = options; >> ctx.shader_info = shader_info; >> ctx.context = LLVMContextCreate(); >> ctx.module = LLVMModuleCreateWithNameInContext("shader", >> ctx.context); >> >> + ac_llvm_context_init(&ctx.ac, ctx.context); >> + ctx.ac.module = ctx.module; >> + >> ctx.has_ds_bpermute = ctx.options->chip_class >= VI; >> >> memset(shader_info, 0, sizeof(*shader_info)); >> >> LLVMSetTarget(ctx.module, "amdgcn--"); >> setup_types(&ctx); >> >> ctx.builder = LLVMCreateBuilderInContext(ctx.context); >> + ctx.ac.builder = ctx.builder; >> ctx.stage = nir->stage; >> >> for (i = 0; i < AC_UD_MAX_SETS; i++) >> shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = >> -1; >> for (i = 0; i < AC_UD_MAX_UD; i++) >> shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; >> >> create_function(&ctx); >> >> if (nir->stage == MESA_SHADER_COMPUTE) { >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >> b/src/gallium/drivers/radeonsi/si_shader.c >> index 8dec55c..f760dd2 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.c >> +++ b/src/gallium/drivers/radeonsi/si_shader.c >> @@ -4599,21 +4599,25 @@ static void tex_fetch_args( >> for (param = 0; param < 2; param++) >> for (chan = 0; chan < num_src_deriv_channels; chan++) >> derivs[param * num_src_deriv_channels + >> chan] = >> lp_build_emit_fetch(bld_base, inst, >> param+1, chan); >> } >> >> if (target == TGSI_TEXTURE_CUBE || >> target == TGSI_TEXTURE_CUBE_ARRAY || >> target == TGSI_TEXTURE_SHADOWCUBE || >> target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) >> - si_prepare_cube_coords(bld_base, emit_data, coords, derivs); >> + ac_prepare_cube_coords(&ctx->ac, >> + opcode == TGSI_OPCODE_TXD, >> + target == TGSI_TEXTURE_CUBE_ARRAY || >> + target == >> TGSI_TEXTURE_SHADOWCUBE_ARRAY, >> + coords, derivs); >> >> if (opcode == TGSI_OPCODE_TXD) >> for (int i = 0; i < num_deriv_channels * 2; i++) >> address[count++] = derivs[i]; >> >> /* Pack texture coordinates */ >> address[count++] = coords[0]; >> if (num_coords > 1) >> address[count++] = coords[1]; >> if (num_coords > 2) >> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h >> b/src/gallium/drivers/radeonsi/si_shader_internal.h >> index 8d6a40b..6b3ac17 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h >> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h >> @@ -21,40 +21,42 @@ >> * USE OR OTHER DEALINGS IN THE SOFTWARE. >> */ >> >> #ifndef SI_SHADER_PRIVATE_H >> #define SI_SHADER_PRIVATE_H >> >> #include "si_shader.h" >> #include "gallivm/lp_bld_init.h" >> #include "gallivm/lp_bld_tgsi.h" >> #include "tgsi/tgsi_parse.h" >> +#include "ac_llvm_util.h" >> >> #include <llvm-c/Core.h> >> #include <llvm-c/TargetMachine.h> >> >> struct pipe_debug_callback; >> struct radeon_shader_binary; >> >> #define RADEON_LLVM_MAX_INPUT_SLOTS 32 >> #define RADEON_LLVM_MAX_INPUTS 32 * 4 >> #define RADEON_LLVM_MAX_OUTPUTS 32 * 4 >> >> #define RADEON_LLVM_INITIAL_CF_DEPTH 4 >> >> #define RADEON_LLVM_MAX_SYSTEM_VALUES 4 >> >> struct si_llvm_flow; >> >> struct si_shader_context { >> struct lp_build_tgsi_soa_context soa; >> struct gallivm_state gallivm; >> + struct ac_llvm_context ac; >> struct si_shader *shader; >> struct si_screen *screen; >> >> unsigned type; /* PIPE_SHADER_* specifies the type of shader. */ >> >> /* Whether the prolog will be compiled separately. */ >> bool separate_prolog; >> >> /** This function is responsible for initilizing the inputs array >> and will be >> * called once for each input declared in the TGSI shader. >> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c >> b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c >> index 2f38949..4a919dd 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c >> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c >> @@ -1245,20 +1245,24 @@ void si_llvm_context_init(struct si_shader_context >> *ctx, >> >> ctx->gallivm.context = LLVMContextCreate(); >> ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", >> ctx->gallivm.context); >> LLVMSetTarget(ctx->gallivm.module, "amdgcn--"); >> >> bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0; >> ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, >> unsafe_fpmath); >> >> + ac_llvm_context_init(&ctx->ac, ctx->gallivm.context); >> + ctx->ac.module = ctx->gallivm.module; >> + ctx->ac.builder = ctx->gallivm.builder; >> + >> struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; >> >> bld_base->info = info; >> >> if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { >> int size = info->array_max[TGSI_FILE_TEMPORARY]; >> >> ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); >> ctx->temp_array_allocas = CALLOC(size, >> sizeof(ctx->temp_array_allocas[0])); >> >> -- >> 2.7.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev