This patch can be ignored. I forgot to delete it and it ended up getting sent. "[PATCH v2 37/41] WIP: radv, ac: implement 16-bit interpolation" is the correct one.
On Sat, 16 Feb 2019 at 00:23, Rhys Perry <pendingchao...@gmail.com> wrote: > > v2: add to patch series > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > --- > src/amd/common/ac_llvm_build.c | 33 +++++++++++++++++------- > src/amd/common/ac_llvm_build.h | 3 ++- > src/amd/common/ac_nir_to_llvm.c | 14 +++++++--- > src/amd/vulkan/radv_nir_to_llvm.c | 27 ++++++++++++++----- > src/amd/vulkan/radv_pipeline.c | 19 ++++++++------ > src/amd/vulkan/radv_shader.h | 1 + > src/gallium/drivers/radeonsi/si_shader.c | 2 +- > 7 files changed, 69 insertions(+), 30 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index dff369aae7f..be2c2251a21 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -937,27 +937,40 @@ ac_build_fs_interp(struct ac_llvm_context *ctx, > LLVMValueRef attr_number, > LLVMValueRef params, > LLVMValueRef i, > - LLVMValueRef j) > + LLVMValueRef j, > + int word) > { > - LLVMValueRef args[5]; > + LLVMValueRef args[6]; > LLVMValueRef p1; > > args[0] = i; > args[1] = llvm_chan; > args[2] = attr_number; > - args[3] = params; > - > - p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", > - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); > + if (word >= 0) { > + args[3] = LLVMConstInt(ctx->i1, word, false); > + args[4] = params; > + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", > + ctx->f16, args, 5, > AC_FUNC_ATTR_READNONE); > + } else { > + args[3] = params; > + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", > + ctx->f32, args, 4, > AC_FUNC_ATTR_READNONE); > + } > > args[0] = p1; > args[1] = j; > args[2] = llvm_chan; > args[3] = attr_number; > - args[4] = params; > - > - return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", > - ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); > + if (word >= 0) { > + args[4] = LLVMConstInt(ctx->i1, word, false); > + args[5] = params; > + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", > + ctx->f16, args, 6, > AC_FUNC_ATTR_READNONE); > + } else { > + args[4] = params; > + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", > + ctx->f32, args, 5, > AC_FUNC_ATTR_READNONE); > + } > } > > LLVMValueRef > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index 61c9b5e4b6c..655427567c4 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -224,7 +224,8 @@ ac_build_fs_interp(struct ac_llvm_context *ctx, > LLVMValueRef attr_number, > LLVMValueRef params, > LLVMValueRef i, > - LLVMValueRef j); > + LLVMValueRef j, > + int word); > > LLVMValueRef > ac_build_fs_interp_mov(struct ac_llvm_context *ctx, > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index bf7024c68e4..939b8eb13de 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -3120,8 +3120,15 @@ static LLVMValueRef visit_interp(struct ac_nir_context > *ctx, > LLVMValueRef j = LLVMBuildExtractElement( > ctx->ac.builder, interp_param, > ctx->ac.i32_1, ""); > > + /* This fp16 handling isn't technically > correct > + * but should be correct for the attributes we > + * are actually going to use. */ > + bool fp16 = instr->dest.ssa.bit_size == 16; > + int word = fp16 ? 0 : -1; > v = ac_build_fs_interp(&ctx->ac, llvm_chan, > attr_number, > - ctx->abi->prim_mask, > i, j); > + ctx->abi->prim_mask, > i, j, word); > + if (fp16) > + v = ac_build_reinterpret(&ctx->ac, v, > ctx->ac.f32); > } else { > v = ac_build_fs_interp_mov(&ctx->ac, > LLVMConstInt(ctx->ac.i32, 2, false), > llvm_chan, > attr_number, ctx->abi->prim_mask); > @@ -3134,8 +3141,9 @@ static LLVMValueRef visit_interp(struct ac_nir_context > *ctx, > result[chan] = LLVMBuildExtractElement(ctx->ac.builder, > gather, attrib_idx, ""); > > } > - return ac_build_varying_gather_values(&ctx->ac, result, > instr->num_components, > - var->data.location_frac); > + LLVMValueRef ret = ac_build_varying_gather_values(&ctx->ac, result, > instr->num_components, > + > var->data.location_frac); > + return ac_build_reinterpret(&ctx->ac, ret, get_def_type(ctx, > &instr->dest.ssa)); > } > > static void visit_intrinsic(struct ac_nir_context *ctx, > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index c46eabf3656..49f8d35dd5f 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -2051,7 +2051,8 @@ static void interp_fs_input(struct radv_shader_context > *ctx, > unsigned attr, > LLVMValueRef interp_param, > LLVMValueRef prim_mask, > - LLVMValueRef result[4]) > + LLVMValueRef result[4], > + bool fp16) > { > LLVMValueRef attr_number; > unsigned chan; > @@ -2086,7 +2087,10 @@ static void interp_fs_input(struct radv_shader_context > *ctx, > result[chan] = ac_build_fs_interp(&ctx->ac, > llvm_chan, > attr_number, > - prim_mask, i, j); > + prim_mask, i, j, > + fp16 ? 0 : -1); > + if (fp16) > + result[chan] = ac_build_reinterpret(&ctx->ac, > result[chan], ctx->ac.f16); > } else { > result[chan] = ac_build_fs_interp_mov(&ctx->ac, > > LLVMConstInt(ctx->ac.i32, 2, false), > @@ -2100,7 +2104,8 @@ static void interp_fs_input(struct radv_shader_context > *ctx, > > static void > handle_fs_input_decl(struct radv_shader_context *ctx, > - struct nir_variable *variable) > + struct nir_variable *variable, > + uint64_t *fp16_mask) > { > int idx = variable->data.location; > unsigned attrib_count = glsl_count_attribute_slots(variable->type, > false); > @@ -2110,7 +2115,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx, > variable->data.driver_location = idx * 4; > mask = ((1ull << attrib_count) - 1) << variable->data.location; > > - if (glsl_get_base_type(glsl_without_array(variable->type)) == > GLSL_TYPE_FLOAT) { > + enum glsl_base_type type = > glsl_get_base_type(glsl_without_array(variable->type)); > + if (type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_FLOAT16) { > unsigned interp_type; > if (variable->data.sample) > interp_type = INTERP_SAMPLE; > @@ -2120,6 +2126,9 @@ handle_fs_input_decl(struct radv_shader_context *ctx, > interp_type = INTERP_CENTER; > > interp = lookup_interp_param(&ctx->abi, > variable->data.interpolation, interp_type); > + > + if (type == GLSL_TYPE_FLOAT16) > + *fp16_mask |= mask; > } > > for (unsigned i = 0; i < attrib_count; ++i) > @@ -2173,8 +2182,9 @@ handle_fs_inputs(struct radv_shader_context *ctx, > { > prepare_interp_optimize(ctx, nir); > > + uint64_t fp16_mask = 0; > nir_foreach_variable(variable, &nir->inputs) > - handle_fs_input_decl(ctx, variable); > + handle_fs_input_decl(ctx, variable, &fp16_mask); > > unsigned index = 0; > > @@ -2194,11 +2204,14 @@ handle_fs_inputs(struct radv_shader_context *ctx, > if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || > i == VARYING_SLOT_PRIMITIVE_ID || i == > VARYING_SLOT_LAYER) { > interp_param = *inputs; > + bool fp16 = fp16_mask & (1ull << i); > interp_fs_input(ctx, index, interp_param, > ctx->abi.prim_mask, > - inputs); > + inputs, fp16); > > if (!interp_param) > ctx->shader_info->fs.flat_shaded_mask |= 1u > << index; > + if (fp16) > + ctx->shader_info->fs.fp16_mask |= 1u << index; > if (i >= VARYING_SLOT_VAR0) > ctx->abi.fs_input_attr_indices[i - > VARYING_SLOT_VAR0] = index; > ++index; > @@ -2210,7 +2223,7 @@ handle_fs_inputs(struct radv_shader_context *ctx, > > interp_param = *inputs; > interp_fs_input(ctx, index, interp_param, > - ctx->abi.prim_mask, inputs); > + ctx->abi.prim_mask, inputs, > false); > ++index; > } > } else if (i == VARYING_SLOT_POS) { > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index ab56a273a2c..a3260291bce 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -3070,13 +3070,15 @@ radv_pipeline_generate_geometry_shader(struct > radeon_cmdbuf *ctx_cs, > radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, > pipeline->gs_copy_shader); > } > > -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) > +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool > fp16) > { > uint32_t ps_input_cntl; > if (offset <= AC_EXP_PARAM_OFFSET_31) { > ps_input_cntl = S_028644_OFFSET(offset); > if (flat_shade) > ps_input_cntl |= S_028644_FLAT_SHADE(1); > + if (fp16 && !flat_shade) > + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1); > } else { > /* The input is a DEFAULT_VAL constant. */ > assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && > @@ -3101,7 +3103,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf > *ctx_cs, > if (ps->info.info.ps.prim_id_input) { > unsigned vs_offset = > outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; > if (vs_offset != AC_EXP_PARAM_UNDEFINED) { > - ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, true); > + ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, true, false); > ++ps_offset; > } > } > @@ -3111,9 +3113,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf > *ctx_cs, > ps->info.info.needs_multiview_view_index) { > unsigned vs_offset = > outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; > if (vs_offset != AC_EXP_PARAM_UNDEFINED) > - ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, true); > + ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, true, false); > else > - ps_input_cntl[ps_offset] = > offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true); > + ps_input_cntl[ps_offset] = > offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false); > ++ps_offset; > } > > @@ -3129,21 +3131,21 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf > *ctx_cs, > > vs_offset = > outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; > if (vs_offset != AC_EXP_PARAM_UNDEFINED) { > - ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, false); > + ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, false, false); > ++ps_offset; > } > > vs_offset = > outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; > if (vs_offset != AC_EXP_PARAM_UNDEFINED && > ps->info.info.ps.num_input_clips_culls > 4) { > - ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, false); > + ps_input_cntl[ps_offset] = > offset_to_ps_input(vs_offset, false, false); > ++ps_offset; > } > } > > for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; > ++i) { > unsigned vs_offset; > - bool flat_shade; > + bool flat_shade, fp16; > if (!(ps->info.fs.input_mask & (1u << i))) > continue; > > @@ -3155,8 +3157,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf > *ctx_cs, > } > > flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << > ps_offset)); > + fp16 = !!(ps->info.fs.fp16_mask & (1u << ps_offset)); > > - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, > flat_shade); > + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, > flat_shade, fp16); > ++ps_offset; > } > > diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h > index b67cd2b4f15..f0e9bc249f9 100644 > --- a/src/amd/vulkan/radv_shader.h > +++ b/src/amd/vulkan/radv_shader.h > @@ -257,6 +257,7 @@ struct radv_shader_variant_info { > unsigned num_interp; > uint32_t input_mask; > uint32_t flat_shaded_mask; > + uint32_t fp16_mask; > bool can_discard; > bool early_fragment_test; > } fs; > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index efae02ee91c..c1f82137020 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -1751,7 +1751,7 @@ static LLVMValueRef si_build_fs_interp(struct > si_shader_context *ctx, > return ac_build_fs_interp(&ctx->ac, > LLVMConstInt(ctx->i32, chan, 0), > LLVMConstInt(ctx->i32, attr_index, > 0), > - prim_mask, i, j); > + prim_mask, i, j, -1); > } > return ac_build_fs_interp_mov(&ctx->ac, > LLVMConstInt(ctx->i32, 2, 0), /* P0 */ > -- > 2.20.1 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev