CTS doesn't test input clip/cull distances for the fragment
shader stage, which explains why this was totally broken. I
wrote a simple test locally that works now.
This fixes a crash with GTA V and DXVK.
Note that we are exporting unused parameters from the vertex
shader now, but this can't be optimized easily because we don't
keep the fragment shader info...
Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107477
Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
src/amd/vulkan/radv_nir_to_llvm.c | 30 +++++++++++++++++++++++++++++-
src/amd/vulkan/radv_pipeline.c | 16 ++++++++++++++++
src/amd/vulkan/radv_shader.h | 1 +
src/amd/vulkan/radv_shader_info.c | 4 ++++
4 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
b/src/amd/vulkan/radv_nir_to_llvm.c
index 4940e3230f..d7cd8cc069 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2098,9 +2098,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
int idx = variable->data.location;
unsigned attrib_count = glsl_count_attribute_slots(variable->type,
false);
LLVMValueRef interp = NULL;
+ uint64_t mask;
variable->data.driver_location = idx * 4;
- ctx->input_mask |= ((1ull << attrib_count) - 1) <<
variable->data.location;
+ mask = ((1ull << attrib_count) - 1) << variable->data.location;
if (glsl_get_base_type(glsl_without_array(variable->type)) ==
GLSL_TYPE_FLOAT) {
unsigned interp_type;
@@ -2121,6 +2122,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
for (unsigned i = 0; i < attrib_count; ++i)
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ /* Do not account for the number of components inside the array
+ * of clip/cull distances because this might wrongly set other
+ * bits like primitive ID or layer.
+ */
+ mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+ }
+
+ ctx->input_mask |= mask;
}
static void
@@ -2187,6 +2197,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
if (LLVMIsUndef(interp_param))
ctx->shader_info->fs.flat_shaded_mask |= 1u <<
index;
++index;
+ } else if (i == VARYING_SLOT_CLIP_DIST0) {
+ int length =
ctx->shader_info->info.ps.num_input_clips_culls;
+
+ for (unsigned j = 0; j < length; j += 4) {
+ inputs = ctx->inputs + ac_llvm_reg_index_soa(i,
j);
+
+ interp_param = *inputs;
+ interp_fs_input(ctx, index, interp_param,
+ ctx->abi.prim_mask, inputs);
+ ++index;
+ }
} else if (i == VARYING_SLOT_POS) {
for(int i = 0; i < 3; ++i)
inputs[i] = ctx->abi.frag_pos[i];
@@ -2482,6 +2503,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
&args, sizeof(args));
+ /* Export the clip/cull distances values to the next stage. */
+ radv_export_param(ctx, param_count, &slots[0], 0xf);
+ outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] =
param_count++;
+ if (ctx->num_output_clips + ctx->num_output_culls > 4) {
+ radv_export_param(ctx, param_count, &slots[4], 0xf);
+
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+ }
}
LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0,
ctx->ac.f32_0, ctx->ac.f32_1};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e63c481d1e..0303642d7e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3052,6 +3052,22 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf
*cs,
ps_offset++;
}
+ if (ps->info.info.ps.num_input_clips_culls) {
+ unsigned vs_offset;
+
+ vs_offset =
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+ ps_input_cntl[ps_offset] =
offset_to_ps_input(vs_offset, true);
+ ++ps_offset;
+ }
+
+ vs_offset =
outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED) {