v2: add to patch series Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- src/amd/common/ac_llvm_build.c | 33 +++++++++++++++++------- src/amd/common/ac_llvm_build.h | 3 ++- src/amd/common/ac_nir_to_llvm.c | 14 +++++++--- src/amd/vulkan/radv_nir_to_llvm.c | 27 ++++++++++++++----- src/amd/vulkan/radv_pipeline.c | 19 ++++++++------ src/amd/vulkan/radv_shader.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 2 +- 7 files changed, 69 insertions(+), 30 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index dff369aae7f..be2c2251a21 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, - LLVMValueRef j) + LLVMValueRef j, + int word) { - LLVMValueRef args[5]; + LLVMValueRef args[6]; LLVMValueRef p1; args[0] = i; args[1] = llvm_chan; args[2] = attr_number; - args[3] = params; - - p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); + if (word >= 0) { + args[3] = LLVMConstInt(ctx->i1, word, false); + args[4] = params; + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", + ctx->f16, args, 5, AC_FUNC_ATTR_READNONE); + } else { + args[3] = params; + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", + ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); + } args[0] = p1; args[1] = j; args[2] = llvm_chan; args[3] = attr_number; - args[4] = params; - - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", - ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); + if (word >= 0) { + args[4] = LLVMConstInt(ctx->i1, word, false); + args[5] = params; + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", + ctx->f16, args, 6, AC_FUNC_ATTR_READNONE); + } else { + args[4] = params; + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", + ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); + } } LLVMValueRef diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 61c9b5e4b6c..655427567c4 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i, - LLVMValueRef j); + LLVMValueRef j, + int word); LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index bf7024c68e4..939b8eb13de 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, LLVMValueRef j = LLVMBuildExtractElement( ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); + /* This fp16 handling isn't technically correct + * but should be correct for the attributes we + * are actually going to use. */ + bool fp16 = instr->dest.ssa.bit_size == 16; + int word = fp16 ? 0 : -1; v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, - ctx->abi->prim_mask, i, j); + ctx->abi->prim_mask, i, j, word); + if (fp16) + v = ac_build_reinterpret(&ctx->ac, v, ctx->ac.f32); } else { v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false), llvm_chan, attr_number, ctx->abi->prim_mask); @@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx, result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, ""); } - return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, - var->data.location_frac); + LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, + var->data.location_frac); + return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, &instr->dest.ssa)); } static void visit_intrinsic(struct ac_nir_context *ctx, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index c46eabf3656..49f8d35dd5f 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context *ctx, unsigned attr, LLVMValueRef interp_param, LLVMValueRef prim_mask, - LLVMValueRef result[4]) + LLVMValueRef result[4], + bool fp16) { LLVMValueRef attr_number; unsigned chan; @@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context *ctx, result[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, - prim_mask, i, j); + prim_mask, i, j, + fp16 ? 0 : -1); + if (fp16) + result[chan] = ac_build_reinterpret(&ctx->ac, result[chan], ctx->ac.f16); } else { result[chan] = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false), @@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context *ctx, static void handle_fs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) + struct nir_variable *variable, + uint64_t *fp16_mask) { int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); @@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx, variable->data.driver_location = idx * 4; mask = ((1ull << attrib_count) - 1) << variable->data.location; - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { + enum glsl_base_type type = glsl_get_base_type(glsl_without_array(variable->type)); + if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) { unsigned interp_type; if (variable->data.sample) interp_type = INTERP_SAMPLE; @@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx, interp_type = INTERP_CENTER; interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); + + if (type == GLSL_TYPE_FLOAT16) + *fp16_mask |= mask; } for (unsigned i = 0; i < attrib_count; ++i) @@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx, { prepare_interp_optimize(ctx, nir); + uint64_t fp16_mask = 0; nir_foreach_variable(variable, &nir->inputs) - handle_fs_input_decl(ctx, variable); + handle_fs_input_decl(ctx, variable, &fp16_mask); unsigned index = 0; @@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { interp_param = *inputs; + bool fp16 = fp16_mask & (1ull << i); interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, - inputs); + inputs, fp16); if (!interp_param) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + if (fp16) + ctx->shader_info->fs.fp16_mask |= 1u << index; if (i >= VARYING_SLOT_VAR0) ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; ++index; @@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx, interp_param = *inputs; interp_fs_input(ctx, index, interp_param, - ctx->abi.prim_mask, inputs); + ctx->abi.prim_mask, inputs, false); ++index; } } else if (i == VARYING_SLOT_POS) { diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ab56a273a2c..a3260291bce 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader); } -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool fp16) { uint32_t ps_input_cntl; if (offset <= AC_EXP_PARAM_OFFSET_31) { ps_input_cntl = S_028644_OFFSET(offset); if (flat_shade) ps_input_cntl |= S_028644_FLAT_SHADE(1); + if (fp16 && !flat_shade) + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1); } else { /* The input is a DEFAULT_VAL constant. */ assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && @@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, if (ps->info.info.ps.prim_id_input) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); ++ps_offset; } } @@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, ps->info.info.needs_multiview_view_index) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); else - ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false); ++ps_offset; } @@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); ++ps_offset; } vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.info.ps.num_input_clips_culls > 4) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); ++ps_offset; } } for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { unsigned vs_offset; - bool flat_shade; + bool flat_shade, fp16; if (!(ps->info.fs.input_mask & (1u << i))) continue; @@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, } flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); + fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset)); - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, fp16); ++ps_offset; } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b67cd2b4f15..f0e9bc249f9 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -257,6 +257,7 @@ struct radv_shader_variant_info { unsigned num_interp; uint32_t input_mask; uint32_t flat_shaded_mask; + uint32_t fp16_mask; bool can_discard; bool early_fragment_test; } fs; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index efae02ee91c..c1f82137020 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, return ac_build_fs_interp(&ctx->ac, LLVMConstInt(ctx->i32, chan, 0), LLVMConstInt(ctx->i32, attr_index, 0), - prim_mask, i, j); + prim_mask, i, j, -1); } return ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->i32, 2, 0), /* P0 */ -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev