--- src/amd/common/ac_nir_to_llvm.c | 68 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 35 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 45eb613579..736131ab56 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -127,21 +127,20 @@ struct nir_to_llvm_context { LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring; LLVMValueRef hs_ring_tess_offchip; LLVMValueRef hs_ring_tess_factor; LLVMValueRef prim_mask; LLVMValueRef sample_pos_offset; LLVMValueRef persp_sample, persp_center, persp_centroid; LLVMValueRef linear_sample, linear_center, linear_centroid; - LLVMTypeRef f32; LLVMTypeRef f16; LLVMTypeRef v2f32; LLVMTypeRef v4f32; unsigned uniform_md_kind; LLVMValueRef empty_md; gl_shader_stage stage; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; @@ -798,22 +797,22 @@ static void create_function(struct nir_to_llvm_context *ctx, if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) add_user_sgpr_argument(&args, ctx->ac.i32, &ctx->view_index); if (ctx->options->key.tes.as_es) { add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // OC LDS add_sgpr_argument(&args, ctx->ac.i32, NULL); // add_sgpr_argument(&args, ctx->ac.i32, &ctx->es2gs_offset); // es2gs offset } else { add_sgpr_argument(&args, ctx->ac.i32, NULL); // add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // OC LDS } - add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u - add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v + add_vgpr_argument(&args, ctx->ac.f32, &ctx->tes_u); // tes_u + add_vgpr_argument(&args, ctx->ac.f32, &ctx->tes_v); // tes_v add_vgpr_argument(&args, ctx->ac.i32, &ctx->tes_rel_patch_id); // tes rel patch id add_vgpr_argument(&args, ctx->ac.i32, &ctx->tes_patch_id); // tes patch id break; case MESA_SHADER_GEOMETRY: if (has_previous_stage) { // First 6 system regs add_sgpr_argument(&args, ctx->ac.i32, &ctx->gs2vs_offset); // tess factor offset add_sgpr_argument(&args, ctx->ac.i32, &ctx->merged_wave_info); // merged wave info add_sgpr_argument(&args, ctx->ac.i32, &ctx->oc_lds); // param oc lds @@ -836,22 +835,22 @@ static void create_function(struct nir_to_llvm_context *ctx, add_vgpr_argument(&args, ctx->ac.i32, &ctx->gs_prim_id); // prim id add_vgpr_argument(&args, ctx->ac.i32, &ctx->gs_invocation_id); add_vgpr_argument(&args, ctx->ac.i32, &ctx->gs_vtx_offset[4]); if (previous_stage == MESA_SHADER_VERTEX) { add_vgpr_argument(&args, ctx->ac.i32, &ctx->abi.vertex_id); // vertex id add_vgpr_argument(&args, ctx->ac.i32, &ctx->rel_auto_id); // rel auto id add_vgpr_argument(&args, ctx->ac.i32, &ctx->vs_prim_id); // vs prim id add_vgpr_argument(&args, ctx->ac.i32, &ctx->abi.instance_id); // instance id } else { - add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u - add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v + add_vgpr_argument(&args, ctx->ac.f32, &ctx->tes_u); // tes_u + add_vgpr_argument(&args, ctx->ac.f32, &ctx->tes_v); // tes_v add_vgpr_argument(&args, ctx->ac.i32, &ctx->tes_rel_patch_id); // tes rel patch id add_vgpr_argument(&args, ctx->ac.i32, &ctx->tes_patch_id); // tes patch id } } else { radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets); radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args); add_user_sgpr_argument(&args, ctx->ac.i32, &ctx->gsvs_ring_stride); // gsvs stride add_user_sgpr_argument(&args, ctx->ac.i32, &ctx->gsvs_num_entries); // gsvs num entires if (ctx->shader_info->info.needs_multiview_view_index) add_user_sgpr_argument(&args, ctx->ac.i32, &ctx->view_index); @@ -872,25 +871,25 @@ static void create_function(struct nir_to_llvm_context *ctx, if (ctx->shader_info->info.ps.needs_sample_positions) add_user_sgpr_argument(&args, ctx->ac.i32, &ctx->sample_pos_offset); /* sample position offset */ add_sgpr_argument(&args, ctx->ac.i32, &ctx->prim_mask); /* prim mask */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->persp_sample); /* persp sample */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->persp_center); /* persp center */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->persp_centroid); /* persp centroid */ add_vgpr_argument(&args, ctx->ac.v3i32, NULL); /* persp pull model */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->linear_sample); /* linear sample */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->linear_center); /* linear center */ add_vgpr_argument(&args, ctx->ac.v2i32, &ctx->linear_centroid); /* linear centroid */ - add_vgpr_argument(&args, ctx->f32, NULL); /* line stipple tex */ - add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[0]); /* pos x float */ - add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[1]); /* pos y float */ - add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[2]); /* pos z float */ - add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[3]); /* pos w float */ + add_vgpr_argument(&args, ctx->ac.f32, NULL); /* line stipple tex */ + add_vgpr_argument(&args, ctx->ac.f32, &ctx->abi.frag_pos[0]); /* pos x float */ + add_vgpr_argument(&args, ctx->ac.f32, &ctx->abi.frag_pos[1]); /* pos y float */ + add_vgpr_argument(&args, ctx->ac.f32, &ctx->abi.frag_pos[2]); /* pos z float */ + add_vgpr_argument(&args, ctx->ac.f32, &ctx->abi.frag_pos[3]); /* pos w float */ add_vgpr_argument(&args, ctx->ac.i32, &ctx->abi.front_face); /* front face */ add_vgpr_argument(&args, ctx->ac.i32, &ctx->abi.ancillary); /* ancillary */ add_vgpr_argument(&args, ctx->ac.i32, &ctx->abi.sample_coverage); /* sample coverage */ add_vgpr_argument(&args, ctx->ac.i32, NULL); /* fixed pt */ break; default: unreachable("Shader stage not implemented"); } ctx->main_function = create_llvm_function( @@ -980,24 +979,23 @@ static void create_function(struct nir_to_llvm_context *ctx, break; default: unreachable("Shader stage not implemented"); } ctx->shader_info->num_user_sgprs = user_sgpr_idx; } static void setup_types(struct nir_to_llvm_context *ctx) { - ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f16 = LLVMHalfTypeInContext(ctx->context); - ctx->v2f32 = LLVMVectorType(ctx->f32, 2); - ctx->v4f32 = LLVMVectorType(ctx->f32, 4); + ctx->v2f32 = LLVMVectorType(ctx->ac.f32, 2); + ctx->v4f32 = LLVMVectorType(ctx->ac.f32, 4); ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); } static int get_llvm_num_components(LLVMValueRef value) { LLVMTypeRef type = LLVMTypeOf(value); unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind @@ -1320,34 +1318,34 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx, if (ctx->options->chip_class >= VI) { LLVMValueRef args[2]; /* Check if the result is a denormal - and flush to 0 if so. */ args[0] = result; args[1] = LLVMConstInt(ctx->ac.i32, N_SUBNORMAL | P_SUBNORMAL, false); cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->ac.i1, args, 2, AC_FUNC_ATTR_READNONE); } /* need to convert back up to f32 */ - result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + result = LLVMBuildFPExt(ctx->builder, result, ctx->ac.f32, ""); if (ctx->options->chip_class >= VI) result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, ""); else { /* for SI/CIK */ /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, * so compare the result and flush to 0 if it's smaller. */ LLVMValueRef temp, cond2; temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs", - ctx->f32, result); + ctx->ac.f32, result); cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT, - LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->f32, ""), + LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->ac.f32, ""), temp, ""); cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, temp, ctx->ac.f32_0, ""); cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, ""); } return result; } static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, @@ -3846,21 +3844,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, break; default: break; } if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, "")); src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, "")); } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { LLVMValueRef sample_position; - LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f); + LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); /* fetch sample ID */ sample_position = load_sample_position(ctx, src0); src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, ""); src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, ""); src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, ""); src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, ""); } interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); @@ -3883,41 +3881,41 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder, ddxy_out, ix_ll, ""); LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder, ddxy_out, iy_ll, ""); LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder, interp_param, ix_ll, ""); LLVMValueRef temp1, temp2; interp_el = LLVMBuildBitCast(ctx->builder, interp_el, - ctx->f32, ""); + ctx->ac.f32, ""); temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, ""); temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); ij_out[i] = LLVMBuildBitCast(ctx->builder, temp2, ctx->ac.i32, ""); } interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); } for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); if (interp_param) { interp_param = LLVMBuildBitCast(ctx->builder, - interp_param, LLVMVectorType(ctx->f32, 2), ""); + interp_param, LLVMVectorType(ctx->ac.f32, 2), ""); LLVMValueRef i = LLVMBuildExtractElement( ctx->builder, interp_param, ctx->ac.i32_0, ""); LLVMValueRef j = LLVMBuildExtractElement( ctx->builder, interp_param, ctx->ac.i32_1, ""); result[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, ctx->prim_mask, i, j); } else { result[chan] = ac_build_fs_interp_mov(&ctx->ac, @@ -5042,21 +5040,21 @@ static void interp_fs_input(struct nir_to_llvm_context *ctx, * interpolation (but the intrinsic can't fetch from the other two * vertices). * * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state * to do the right thing. The only reason we use fs.constant is that * fs.interp cannot be used on integers, because they can be equal * to NaN. */ if (interp) { interp_param = LLVMBuildBitCast(ctx->builder, interp_param, - LLVMVectorType(ctx->f32, 2), ""); + LLVMVectorType(ctx->ac.f32, 2), ""); i = LLVMBuildExtractElement(ctx->builder, interp_param, ctx->ac.i32_0, ""); j = LLVMBuildExtractElement(ctx->builder, interp_param, ctx->ac.i32_1, ""); } for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); @@ -5309,21 +5307,21 @@ static LLVMTypeRef glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx, enum glsl_base_type type) { switch (type) { case GLSL_TYPE_INT: case GLSL_TYPE_UINT: case GLSL_TYPE_BOOL: case GLSL_TYPE_SUBROUTINE: return ctx->ac.i32; case GLSL_TYPE_FLOAT: /* TODO handle mediump */ - return ctx->f32; + return ctx->ac.f32; case GLSL_TYPE_INT64: case GLSL_TYPE_UINT64: return ctx->ac.i64; case GLSL_TYPE_DOUBLE: return ctx->ac.f64; default: unreachable("unknown GLSL type"); } } @@ -5438,24 +5436,24 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, /* Specify whether the EXEC mask represents the valid mask */ args->valid_mask = 0; /* Specify whether this is the last export */ args->done = 0; /* Specify the target we are exporting */ args->target = target; args->compr = false; - args->out[0] = LLVMGetUndef(ctx->f32); - args->out[1] = LLVMGetUndef(ctx->f32); - args->out[2] = LLVMGetUndef(ctx->f32); - args->out[3] = LLVMGetUndef(ctx->f32); + args->out[0] = LLVMGetUndef(ctx->ac.f32); + args->out[1] = LLVMGetUndef(ctx->ac.f32); + args->out[2] = LLVMGetUndef(ctx->ac.f32); + args->out[3] = LLVMGetUndef(ctx->ac.f32); if (!values) return; if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { LLVMValueRef val[4]; unsigned index = target - V_008DFC_SQ_EXP_MRT; unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; @@ -5495,45 +5493,45 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args); args->out[chan] = packed; } break; case V_028714_SPI_SHADER_UNORM16_ABGR: for (unsigned chan = 0; chan < 4; chan++) { val[chan] = ac_build_clamp(&ctx->ac, values[chan]); val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 65535), ""); + LLVMConstReal(ctx->ac.f32, 65535), ""); val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 0.5), ""); + LLVMConstReal(ctx->ac.f32, 0.5), ""); val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], ctx->ac.i32, ""); } args->compr = 1; args->out[0] = emit_pack_int16(ctx, val[0], val[1]); args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; case V_028714_SPI_SHADER_SNORM16_ABGR: for (unsigned chan = 0; chan < 4; chan++) { val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1); val[chan] = LLVMBuildFMul(ctx->builder, val[chan], - LLVMConstReal(ctx->f32, 32767), ""); + LLVMConstReal(ctx->ac.f32, 32767), ""); /* If positive, add 0.5, else add -0.5. */ val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], LLVMBuildSelect(ctx->builder, LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val[chan], ctx->ac.f32_0, ""), - LLVMConstReal(ctx->f32, 0.5), - LLVMConstReal(ctx->f32, -0.5), ""), ""); + LLVMConstReal(ctx->ac.f32, 0.5), + LLVMConstReal(ctx->ac.f32, -0.5), ""), ""); val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, ""); } args->compr = 1; args->out[0] = emit_pack_int16(ctx, val[0], val[1]); args->out[1] = emit_pack_int16(ctx, val[2], val[3]); break; case V_028714_SPI_SHADER_UINT16_ABGR: { LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32, @@ -5617,21 +5615,21 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, if (outinfo->cull_dist_mask) outinfo->cull_dist_mask <<= ctx->num_output_clips; i = VARYING_SLOT_CLIP_DIST0; for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) - slots[i] = LLVMGetUndef(ctx->f32); + slots[i] = LLVMGetUndef(ctx->ac.f32); if (ctx->num_output_clips + ctx->num_output_culls > 4) { target = V_008DFC_SQ_EXP_POS + 3; si_llvm_init_export_args(ctx, &slots[4], target, &args); memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], &args, sizeof(args)); } target = V_008DFC_SQ_EXP_POS + 2; si_llvm_init_export_args(ctx, &slots[0], target, &args); @@ -6151,24 +6149,24 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, LLVMValueRef samplemask) { struct ac_export_args args; args.enabled_channels = 0; args.valid_mask = 1; args.done = 1; args.target = V_008DFC_SQ_EXP_MRTZ; args.compr = false; - args.out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ - args.out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ - args.out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ - args.out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ + args.out[0] = LLVMGetUndef(ctx->ac.f32); /* R, depth */ + args.out[1] = LLVMGetUndef(ctx->ac.f32); /* G, stencil test val[0:7], stencil op val[8:15] */ + args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */ + args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */ if (depth) { args.out[0] = depth; args.enabled_channels |= 0x1; } if (stencil) { args.out[1] = stencil; args.enabled_channels |= 0x2; } -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev