Unfortunately this one breaks at least (surprise!) texturecubemap SaschaWillemsVulkan demo. I recommend you try it yourself, there are even precompiled binaries available (see README.md): https://github.com/SaschaWillems/Vulkan
Gražvydas On Tue, Jan 10, 2017 at 5:12 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > Code is taken from a combination of radv (for the more basic functions, > to avoid gallivm dependencies) and radeonsi (for the new and improved > derivative calculations). > --- > src/amd/common/ac_llvm_util.c | 362 > +++++++++++++++++++++ > src/amd/common/ac_llvm_util.h | 57 ++++ > src/amd/common/ac_nir_to_llvm.c | 204 +----------- > src/gallium/drivers/radeonsi/si_shader.c | 6 +- > src/gallium/drivers/radeonsi/si_shader_internal.h | 2 + > .../drivers/radeonsi/si_shader_tgsi_setup.c | 4 + > 6 files changed, 438 insertions(+), 197 deletions(-) > > diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c > index a8408dd..6dd6cfa 100644 > --- a/src/amd/common/ac_llvm_util.c > +++ b/src/amd/common/ac_llvm_util.c > @@ -25,20 +25,23 @@ > /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ > #include "ac_llvm_util.h" > > #include <llvm-c/Core.h> > > #include "c11/threads.h" > > #include <assert.h> > #include <stdio.h> > > +#include "util/bitscan.h" > +#include "util/macros.h" > + > static void ac_init_llvm_target() > { > #if HAVE_LLVM < 0x0307 > LLVMInitializeR600TargetInfo(); > LLVMInitializeR600Target(); > LLVMInitializeR600TargetMC(); > LLVMInitializeR600AsmPrinter(); > #else > LLVMInitializeAMDGPUTargetInfo(); > LLVMInitializeAMDGPUTarget(); > @@ -133,10 +136,369 @@ LLVMTargetMachineRef ac_create_target_machine(enum > radeon_family family) > target, > triple, > ac_get_llvm_processor_name(family), > "+DumpCode,+vgpr-spilling", > LLVMCodeGenLevelDefault, > LLVMRelocDefault, > LLVMCodeModelDefault); > > return tm; > } > + > +/* Initialize module-independent parts of the context. > + * > + * The caller is responsible for initializing ctx::module and ctx::builder. > + */ > +void > +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) > +{ > + LLVMValueRef args[1]; > + > + ctx->context = context; > + ctx->module = NULL; > + ctx->builder = NULL; > + > + ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); > + ctx->f32 = LLVMFloatTypeInContext(ctx->context); > + > + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, > "fpmath", 6); > + > + args[0] = LLVMConstReal(ctx->f32, 2.5); > + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); > +} > + > +#if HAVE_LLVM < 0x0400 > +static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) > +{ > + switch (attr) { > + case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; > + case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; > + case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; > + case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; > + case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; > + case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; > + case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; > + default: > + fprintf(stderr, "Unhandled function attribute: %x\n", attr); > + return 0; > + } > +} > + > +#else > + > +static const char *attr_to_str(enum ac_func_attr attr) > +{ > + switch (attr) { > + case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; > + case AC_FUNC_ATTR_BYVAL: return "byval"; > + case AC_FUNC_ATTR_INREG: return "inreg"; > + case AC_FUNC_ATTR_NOALIAS: return "noalias"; > + case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; > + case AC_FUNC_ATTR_READNONE: return "readnone"; > + case AC_FUNC_ATTR_READONLY: return "readonly"; > + default: > + fprintf(stderr, "Unhandled function attribute: %x\n", attr); > + return 0; > + } > +} > + > +#endif > + > +void > +ac_add_function_attr(LLVMValueRef function, > + int attr_idx, > + enum ac_func_attr attr) > +{ > + > +#if HAVE_LLVM < 0x0400 > + LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); > + if (attr_idx == -1) { > + LLVMAddFunctionAttr(function, llvm_attr); > + } else { > + LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); > + } > +#else > + LLVMContextRef context = > LLVMGetModuleContext(LLVMGetGlobalParent(function)); > + const char *attr_name = attr_to_str(attr); > + unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, > + strlen(attr_name)); > + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); > + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); > +#endif > +} > + > +LLVMValueRef > +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > + LLVMTypeRef return_type, LLVMValueRef *params, > + unsigned param_count, unsigned attrib_mask) > +{ > + LLVMValueRef function; > + > + function = LLVMGetNamedFunction(ctx->module, name); > + if (!function) { > + LLVMTypeRef param_types[32], function_type; > + unsigned i; > + > + assert(param_count <= 32); > + > + for (i = 0; i < param_count; ++i) { > + assert(params[i]); > + param_types[i] = LLVMTypeOf(params[i]); > + } > + function_type = > + LLVMFunctionType(return_type, param_types, param_count, > 0); > + function = LLVMAddFunction(ctx->module, name, function_type); > + > + LLVMSetFunctionCallConv(function, LLVMCCallConv); > + LLVMSetLinkage(function, LLVMExternalLinkage); > + > + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; > + while (attrib_mask) { > + enum ac_func_attr attr = 1u << > u_bit_scan(&attrib_mask); > + ac_add_function_attr(function, -1, attr); > + } > + } > + return LLVMBuildCall(ctx->builder, function, params, param_count, ""); > +} > + > +LLVMValueRef > +ac_build_gather_values_extended(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count, > + unsigned value_stride, > + bool load) > +{ > + LLVMBuilderRef builder = ctx->builder; > + LLVMValueRef vec; > + unsigned i; > + > + > + if (value_count == 1) { > + if (load) > + return LLVMBuildLoad(builder, values[0], ""); > + return values[0]; > + } else if (!value_count) > + unreachable("value_count is 0"); > + > + for (i = 0; i < value_count; i++) { > + LLVMValueRef value = values[i * value_stride]; > + if (load) > + value = LLVMBuildLoad(builder, value, ""); > + > + if (!i) > + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), > value_count)); > + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); > + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); > + } > + return vec; > +} > + > +LLVMValueRef > +ac_build_gather_values(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count) > +{ > + return ac_build_gather_values_extended(ctx, values, value_count, 1, > false); > +} > + > +LLVMValueRef > +ac_emit_fdiv(struct ac_llvm_context *ctx, > + LLVMValueRef num, > + LLVMValueRef den) > +{ > + LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); > + > + if (!LLVMIsConstant(ret)) > + LLVMSetMetadata(ret, ctx->fpmath_md_kind, > ctx->fpmath_md_2p5_ulp); > + return ret; > +} > + > +/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 > + * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is > + * already multiplied by two. id is the cube face number. > + */ > +struct cube_selection_coords { > + LLVMValueRef stc[2]; > + LLVMValueRef ma; > + LLVMValueRef id; > +}; > + > +static void > +build_cube_intrinsic(struct ac_llvm_context *ctx, > + LLVMValueRef in[3], > + struct cube_selection_coords *out) > +{ > + LLVMBuilderRef builder = ctx->builder; > + > + if (HAVE_LLVM >= 0x0309) { > + LLVMTypeRef f32 = ctx->f32; > + > + out->stc[1] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubetc", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->stc[0] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubesc", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef c[4] = { > + in[0], > + in[1], > + in[2], > + LLVMGetUndef(LLVMTypeOf(in[0])) > + }; > + LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); > + > + LLVMValueRef tmp = > + ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", > + LLVMTypeOf(vec), &vec, 1, > + AC_FUNC_ATTR_READNONE); > + > + out->stc[1] = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 0, 0), ""); > + out->stc[0] = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 1, 0), ""); > + out->ma = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 2, 0), ""); > + out->id = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 3, 0), ""); > + } > +} > + > +/** > + * Build a manual selection sequence for cube face sc/tc coordinates and > + * major axis vector (multiplied by 2 for consistency) for the given > + * vec3 \p coords, for the face implied by \p selcoords. > + * > + * For the major axis, we always adjust the sign to be in the direction of > + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards > + * the selcoords major axis. > + */ > +static void build_cube_select(LLVMBuilderRef builder, > + const struct cube_selection_coords *selcoords, > + const LLVMValueRef *coords, > + LLVMValueRef *out_st, > + LLVMValueRef *out_ma) > +{ > + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); > + LLVMValueRef is_ma_positive; > + LLVMValueRef sgn_ma; > + LLVMValueRef is_ma_z, is_not_ma_z; > + LLVMValueRef is_ma_y; > + LLVMValueRef is_ma_x; > + LLVMValueRef sgn; > + LLVMValueRef tmp; > + > + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, > + selcoords->ma, LLVMConstReal(f32, 0.0), ""); > + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); > + > + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 4.0), ""); > + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); > + is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, > + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 2.0), ""), ""); > + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, > is_ma_y, ""), ""); > + > + /* Select sc */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), > + LLVMBuildSelect(builder, is_ma_x, sgn_ma, > + LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); > + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select tc */ > + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, > sgn_ma, ""), > + LLVMConstReal(f32, -1.0), ""); > + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select ma */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], > + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), > ""); > + sgn = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); > + *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); > +} > + > +void > +ac_prepare_cube_coords(struct ac_llvm_context *ctx, > + bool is_deriv, bool is_array, > + LLVMValueRef *coords_arg, > + LLVMValueRef *derivs_arg) > +{ > + > + LLVMBuilderRef builder = ctx->builder; > + struct cube_selection_coords selcoords; > + LLVMValueRef coords[4]; > + LLVMValueRef invma; > + > + build_cube_intrinsic(ctx, coords_arg, &selcoords); > + > + invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", > + ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); > + invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); > + > + for (int i = 0; i < 2; ++i) { > + coords[i] = > + LLVMBuildFAdd(builder, > + LLVMBuildFMul(builder, selcoords.stc[i], > invma, ""), > + LLVMConstReal(ctx->f32, 0.5), ""); > + } > + > + coords[2] = selcoords.id; > + > + if (is_deriv && derivs_arg) { > + LLVMValueRef derivs[4]; > + int axis; > + > + /* Convert cube derivatives to 2D derivatives. */ > + for (axis = 0; axis < 2; axis++) { > + LLVMValueRef deriv_st[2]; > + LLVMValueRef deriv_ma; > + > + /* Transform the derivative alongside the texture > + * coordinate. Mathematically, the correct formula is > + * as follows. Assume we're projecting onto the +Z > face > + * and denote by dx/dh the derivative of the > (original) > + * X texture coordinate with respect to horizontal > + * window coordinates. The projection onto the +Z face > + * plane is: > + * > + * f(x,z) = x/z > + * > + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh > + * = 1/z * dx/dh - x/z * 1/z * dz/dh. > + * > + * This motivatives the implementation below. > + * > + * Whether this actually gives the expected results > for > + * apps that might feed in derivatives obtained via > + * finite differences is anyone's guess. The OpenGL > spec > + * seems awfully quiet about how textureGrad for cube > + * maps should be handled. > + */ > + build_cube_select(builder, &selcoords, > &derivs_arg[axis * 3], > + deriv_st, &deriv_ma); > + > + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, > ""); > + > + for (int i = 0; i < 2; ++i) > + derivs[axis * 2 + i] = > + LLVMBuildFSub(builder, > + LLVMBuildFMul(builder, > deriv_st[i], invma, ""), > + LLVMBuildFMul(builder, > deriv_ma, coords[i], ""), ""); > + } > + > + memcpy(derivs_arg, derivs, sizeof(derivs)); > + } > + > + if (is_array) { > + /* for cube arrays coord.z = coord.w(array_index) * 8 + face > */ > + /* coords_arg.w component - array_index for cube arrays */ > + LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], > LLVMConstReal(ctx->f32, 8.0), ""); > + coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); > + } > + > + memcpy(coords_arg, coords, sizeof(coords)); > +} > diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h > index d9ea9bd..802c266 100644 > --- a/src/amd/common/ac_llvm_util.h > +++ b/src/amd/common/ac_llvm_util.h > @@ -26,18 +26,75 @@ > > #include <stdbool.h> > #include <llvm-c/TargetMachine.h> > > #include "amd_family.h" > > #ifdef __cplusplus > extern "C" { > #endif > > +enum ac_func_attr { > + AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), > + AC_FUNC_ATTR_BYVAL = (1 << 1), > + AC_FUNC_ATTR_INREG = (1 << 2), > + AC_FUNC_ATTR_NOALIAS = (1 << 3), > + AC_FUNC_ATTR_NOUNWIND = (1 << 4), > + AC_FUNC_ATTR_READNONE = (1 << 5), > + AC_FUNC_ATTR_READONLY = (1 << 6), > + AC_FUNC_ATTR_LAST = (1 << 7) > +}; > + > +struct ac_llvm_context { > + LLVMContextRef context; > + LLVMModuleRef module; > + LLVMBuilderRef builder; > + > + LLVMTypeRef i32; > + LLVMTypeRef f32; > + > + unsigned fpmath_md_kind; > + LLVMValueRef fpmath_md_2p5_ulp; > +}; > + > LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family); > > void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); > bool ac_is_sgpr_param(LLVMValueRef param); > > +void > +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context); > + > +void > +ac_add_function_attr(LLVMValueRef function, > + int attr_idx, > + enum ac_func_attr attr); > +LLVMValueRef > +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > + LLVMTypeRef return_type, LLVMValueRef *params, > + unsigned param_count, unsigned attrib_mask); > + > +LLVMValueRef > +ac_build_gather_values_extended(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count, > + unsigned value_stride, > + bool load); > +LLVMValueRef > +ac_build_gather_values(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count); > + > +LLVMValueRef > +ac_emit_fdiv(struct ac_llvm_context *ctx, > + LLVMValueRef num, > + LLVMValueRef den); > + > +void > +ac_prepare_cube_coords(struct ac_llvm_context *ctx, > + bool is_deriv, bool is_array, > + LLVMValueRef *coords_arg, > + LLVMValueRef *derivs_arg); > + > #ifdef __cplusplus > } > #endif > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index f214fcd..0fd8559 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -44,20 +44,21 @@ enum radeon_llvm_calling_convention { > #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) > > enum desc_type { > DESC_IMAGE, > DESC_FMASK, > DESC_SAMPLER, > DESC_BUFFER, > }; > > struct nir_to_llvm_context { > + struct ac_llvm_context ac; > const struct ac_nir_compiler_options *options; > struct ac_shader_variant_info *shader_info; > > LLVMContextRef context; > LLVMModuleRef module; > LLVMBuilderRef builder; > LLVMValueRef main_function; > > struct hash_table *defs; > struct hash_table *phis; > @@ -134,91 +135,20 @@ struct nir_to_llvm_context { > bool has_ds_bpermute; > }; > > struct ac_tex_info { > LLVMValueRef args[12]; > int arg_count; > LLVMTypeRef dst_type; > bool has_offset; > }; > > -enum ac_func_attr { > - AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0), > - AC_FUNC_ATTR_BYVAL = (1 << 1), > - AC_FUNC_ATTR_INREG = (1 << 2), > - AC_FUNC_ATTR_NOALIAS = (1 << 3), > - AC_FUNC_ATTR_NOUNWIND = (1 << 4), > - AC_FUNC_ATTR_READNONE = (1 << 5), > - AC_FUNC_ATTR_READONLY = (1 << 6), > - AC_FUNC_ATTR_LAST = (1 << 7) > -}; > - > -#if HAVE_LLVM < 0x0400 > -static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) > -{ > - switch (attr) { > - case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; > - case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; > - case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; > - case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; > - case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; > - case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; > - case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; > - default: > - fprintf(stderr, "Unhandled function attribute: %x\n", attr); > - return 0; > - } > -} > - > -#else > - > -static const char *attr_to_str(enum ac_func_attr attr) > -{ > - switch (attr) { > - case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; > - case AC_FUNC_ATTR_BYVAL: return "byval"; > - case AC_FUNC_ATTR_INREG: return "inreg"; > - case AC_FUNC_ATTR_NOALIAS: return "noalias"; > - case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; > - case AC_FUNC_ATTR_READNONE: return "readnone"; > - case AC_FUNC_ATTR_READONLY: return "readonly"; > - default: > - fprintf(stderr, "Unhandled function attribute: %x\n", attr); > - return 0; > - } > -} > - > -#endif > - > -static void > -ac_add_function_attr(LLVMValueRef function, > - int attr_idx, > - enum ac_func_attr attr) > -{ > - > -#if HAVE_LLVM < 0x0400 > - LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); > - if (attr_idx == -1) { > - LLVMAddFunctionAttr(function, llvm_attr); > - } else { > - LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); > - } > -#else > - LLVMContextRef context = > LLVMGetModuleContext(LLVMGetGlobalParent(function)); > - const char *attr_name = attr_to_str(attr); > - unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, > - strlen(attr_name)); > - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); > - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); > -#endif > -} > - > static LLVMValueRef > emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name, > LLVMTypeRef return_type, LLVMValueRef *params, > unsigned param_count, unsigned attr_mask); > static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, > nir_deref_var *deref, > enum desc_type desc_type); > static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) > { > return (index * 4) + chan; > @@ -3297,144 +3227,20 @@ static void tex_fetch_ptrs(struct > nir_to_llvm_context *ctx, > else > *samp_ptr = get_sampler_desc(ctx, instr->texture, > DESC_SAMPLER); > if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) > *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, > *samp_ptr); > } > if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || > instr->op == > nir_texop_samples_identical)) > *fmask_ptr = get_sampler_desc(ctx, instr->texture, > DESC_FMASK); > } > > -static LLVMValueRef build_cube_intrinsic(struct nir_to_llvm_context *ctx, > - LLVMValueRef *in) > -{ > - > - LLVMValueRef v, cube_vec; > - > - if (1) { > - LLVMTypeRef f32 = LLVMTypeOf(in[0]); > - LLVMValueRef out[4]; > - > - out[0] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc", > - f32, in, 3, > AC_FUNC_ATTR_READNONE); > - out[1] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc", > - f32, in, 3, > AC_FUNC_ATTR_READNONE); > - out[2] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", > - f32, in, 3, > AC_FUNC_ATTR_READNONE); > - out[3] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", > - f32, in, 3, > AC_FUNC_ATTR_READNONE); > - > - return build_gather_values(ctx, out, 4); > - } else { > - LLVMValueRef c[4]; > - c[0] = in[0]; > - c[1] = in[1]; > - c[2] = in[2]; > - c[3] = LLVMGetUndef(LLVMTypeOf(in[0])); > - cube_vec = build_gather_values(ctx, c, 4); > - v = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", > LLVMTypeOf(cube_vec), > - &cube_vec, 1, AC_FUNC_ATTR_READNONE); > - } > - return v; > -} > - > -static void cube_to_2d_coords(struct nir_to_llvm_context *ctx, > - LLVMValueRef *in, LLVMValueRef *out) > -{ > - LLVMValueRef coords[4]; > - LLVMValueRef mad_args[3]; > - LLVMValueRef v; > - LLVMValueRef tmp; > - int i; > - > - v = build_cube_intrinsic(ctx, in); > - for (i = 0; i < 4; i++) > - coords[i] = LLVMBuildExtractElement(ctx->builder, v, > - LLVMConstInt(ctx->i32, i, > false), ""); > - > - coords[2] = emit_llvm_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, > - &coords[2], 1, AC_FUNC_ATTR_READNONE); > - coords[2] = emit_fdiv(ctx, ctx->f32one, coords[2]); > - > - mad_args[1] = coords[2]; > - mad_args[2] = LLVMConstReal(ctx->f32, 1.5); > - mad_args[0] = coords[0]; > - > - /* emit MAD */ > - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); > - coords[0] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); > - > - mad_args[0] = coords[1]; > - > - /* emit MAD */ > - tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], ""); > - coords[1] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], ""); > - > - /* apply xyz = yxw swizzle to cooords */ > - out[0] = coords[1]; > - out[1] = coords[0]; > - out[2] = coords[3]; > -} > - > -static void emit_prepare_cube_coords(struct nir_to_llvm_context *ctx, > - LLVMValueRef *coords_arg, int num_coords, > - bool is_deriv, > - bool is_array, LLVMValueRef *derivs_arg) > -{ > - LLVMValueRef coords[4]; > - int i; > - cube_to_2d_coords(ctx, coords_arg, coords); > - > - if (is_deriv && derivs_arg) { > - LLVMValueRef derivs[4]; > - int axis; > - > - /* Convert cube derivatives to 2D derivatives. */ > - for (axis = 0; axis < 2; axis++) { > - LLVMValueRef shifted_cube_coords[4], > shifted_coords[4]; > - > - /* Shift the cube coordinates by the derivatives to > get > - * the cube coordinates of the "neighboring pixel". > - */ > - for (i = 0; i < 3; i++) > - shifted_cube_coords[i] = > - LLVMBuildFAdd(ctx->builder, > coords_arg[i], > - derivs_arg[axis*3+i], > ""); > - shifted_cube_coords[3] = LLVMGetUndef(ctx->f32); > - > - /* Project the shifted cube coordinates onto the > face. */ > - cube_to_2d_coords(ctx, shifted_cube_coords, > - shifted_coords); > - > - /* Subtract both sets of 2D coordinates to get 2D > derivatives. > - * This won't work if the shifted coordinates ended up > - * in a different face. > - */ > - for (i = 0; i < 2; i++) > - derivs[axis * 2 + i] = > - LLVMBuildFSub(ctx->builder, > shifted_coords[i], > - coords[i], ""); > - } > - > - memcpy(derivs_arg, derivs, sizeof(derivs)); > - } > - > - if (is_array) { > - /* for cube arrays coord.z = coord.w(array_index) * 8 + face > */ > - /* coords_arg.w component - array_index for cube arrays */ > - LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], > LLVMConstReal(ctx->f32, 8.0), ""); > - coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); > - } > - > - memcpy(coords_arg, coords, sizeof(coords)); > -} > - > static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) > { > LLVMValueRef result = NULL; > struct ac_tex_info tinfo = { 0 }; > unsigned dmask = 0xf; > LLVMValueRef address[16]; > LLVMValueRef coords[5]; > LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; > LLVMValueRef bias = NULL, offsets = NULL; > LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; > @@ -3562,21 +3368,23 @@ static void visit_tex(struct nir_to_llvm_context > *ctx, nir_tex_instr *instr) > derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, > ddx, i)); > derivs[i * 2 + 1] = to_float(ctx, > llvm_extract_elem(ctx, ddy, i)); > } > } > > if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { > for (chan = 0; chan < instr->coord_components; chan++) > coords[chan] = to_float(ctx, coords[chan]); > if (instr->coord_components == 3) > coords[3] = LLVMGetUndef(ctx->f32); > - emit_prepare_cube_coords(ctx, coords, > instr->coord_components, instr->op == nir_texop_txd, instr->is_array, derivs); > + ac_prepare_cube_coords(&ctx->ac, > + instr->op == nir_texop_txd, instr->is_array, > + coords, derivs); > if (num_deriv_comp) > num_deriv_comp--; > } > > if (ddx || ddy) { > for (unsigned i = 0; i < num_deriv_comp * 2; i++) > address[count++] = derivs[i]; > } > > /* Pack texture coordinates */ > @@ -4672,28 +4480,32 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > const struct ac_nir_compiler_options > *options) > { > struct nir_to_llvm_context ctx = {0}; > struct nir_function *func; > unsigned i; > ctx.options = options; > ctx.shader_info = shader_info; > ctx.context = LLVMContextCreate(); > ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); > > + ac_llvm_context_init(&ctx.ac, ctx.context); > + ctx.ac.module = ctx.module; > + > ctx.has_ds_bpermute = ctx.options->chip_class >= VI; > > memset(shader_info, 0, sizeof(*shader_info)); > > LLVMSetTarget(ctx.module, "amdgcn--"); > setup_types(&ctx); > > ctx.builder = LLVMCreateBuilderInContext(ctx.context); > + ctx.ac.builder = ctx.builder; > ctx.stage = nir->stage; > > for (i = 0; i < AC_UD_MAX_SETS; i++) > shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; > for (i = 0; i < AC_UD_MAX_UD; i++) > shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; > > create_function(&ctx); > > if (nir->stage == MESA_SHADER_COMPUTE) { > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 8dec55c..f760dd2 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4599,21 +4599,25 @@ static void tex_fetch_args( > for (param = 0; param < 2; param++) > for (chan = 0; chan < num_src_deriv_channels; chan++) > derivs[param * num_src_deriv_channels + chan] > = > lp_build_emit_fetch(bld_base, inst, > param+1, chan); > } > > if (target == TGSI_TEXTURE_CUBE || > target == TGSI_TEXTURE_CUBE_ARRAY || > target == TGSI_TEXTURE_SHADOWCUBE || > target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) > - si_prepare_cube_coords(bld_base, emit_data, coords, derivs); > + ac_prepare_cube_coords(&ctx->ac, > + opcode == TGSI_OPCODE_TXD, > + target == TGSI_TEXTURE_CUBE_ARRAY || > + target == > TGSI_TEXTURE_SHADOWCUBE_ARRAY, > + coords, derivs); > > if (opcode == TGSI_OPCODE_TXD) > for (int i = 0; i < num_deriv_channels * 2; i++) > address[count++] = derivs[i]; > > /* Pack texture coordinates */ > address[count++] = coords[0]; > if (num_coords > 1) > address[count++] = coords[1]; > if (num_coords > 2) > diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h > b/src/gallium/drivers/radeonsi/si_shader_internal.h > index 8d6a40b..6b3ac17 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_internal.h > +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h > @@ -21,40 +21,42 @@ > * USE OR OTHER DEALINGS IN THE SOFTWARE. > */ > > #ifndef SI_SHADER_PRIVATE_H > #define SI_SHADER_PRIVATE_H > > #include "si_shader.h" > #include "gallivm/lp_bld_init.h" > #include "gallivm/lp_bld_tgsi.h" > #include "tgsi/tgsi_parse.h" > +#include "ac_llvm_util.h" > > #include <llvm-c/Core.h> > #include <llvm-c/TargetMachine.h> > > struct pipe_debug_callback; > struct radeon_shader_binary; > > #define RADEON_LLVM_MAX_INPUT_SLOTS 32 > #define RADEON_LLVM_MAX_INPUTS 32 * 4 > #define RADEON_LLVM_MAX_OUTPUTS 32 * 4 > > #define RADEON_LLVM_INITIAL_CF_DEPTH 4 > > #define RADEON_LLVM_MAX_SYSTEM_VALUES 4 > > struct si_llvm_flow; > > struct si_shader_context { > struct lp_build_tgsi_soa_context soa; > struct gallivm_state gallivm; > + struct ac_llvm_context ac; > struct si_shader *shader; > struct si_screen *screen; > > unsigned type; /* PIPE_SHADER_* specifies the type of shader. */ > > /* Whether the prolog will be compiled separately. */ > bool separate_prolog; > > /** This function is responsible for initilizing the inputs array and > will be > * called once for each input declared in the TGSI shader. > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c > index 2f38949..4a919dd 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c > @@ -1245,20 +1245,24 @@ void si_llvm_context_init(struct si_shader_context > *ctx, > > ctx->gallivm.context = LLVMContextCreate(); > ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", > ctx->gallivm.context); > LLVMSetTarget(ctx->gallivm.module, "amdgcn--"); > > bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0; > ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, > unsafe_fpmath); > > + ac_llvm_context_init(&ctx->ac, ctx->gallivm.context); > + ctx->ac.module = ctx->gallivm.module; > + ctx->ac.builder = ctx->gallivm.builder; > + > struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; > > bld_base->info = info; > > if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { > int size = info->array_max[TGSI_FILE_TEMPORARY]; > > ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); > ctx->temp_array_allocas = CALLOC(size, > sizeof(ctx->temp_array_allocas[0])); > > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev