From: Kenneth Graunke <kenn...@whitecape.org> ARB_enhanced_layouts enables us to pack array varyings with non-arrays types e.g.
layout(location = 0) in vec3 a[6]; layout(location = 0, component = 3) in float b; With this change we calculate the size of output registers in a separate pass, before allocating them. Reviewed-by: Timothy Arceri <tarc...@itsqueeze.com> --- src/intel/compiler/brw_fs_nir.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 425c52c9917..7ed44f534c0 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -53,14 +53,27 @@ fs_visitor::nir_setup_outputs() if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT) return; + unsigned vec4s[VARYING_SLOT_TESS_MAX] = { 0, }; + + /* Calculate the size of output registers in a separate pass, before + * allocating them. With ARB_enhanced_layouts, multiple output variables + * may occupy the same slot, but have different type sizes. + */ nir_foreach_variable(var, &nir->outputs) { - const unsigned vec4s = + const int loc = var->data.driver_location; + const unsigned var_vec4s = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) : type_size_vec4(var->type); - fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s); - for (unsigned i = 0; i < vec4s; i++) { - if (outputs[var->data.driver_location + i].file == BAD_FILE) - outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i); + vec4s[loc] = MAX2(vec4s[loc], var_vec4s); + } + + nir_foreach_variable(var, &nir->outputs) { + const int loc = var->data.driver_location; + if (outputs[loc].file == BAD_FILE) { + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s[loc]); + for (unsigned i = 0; i < vec4s[loc]; i++) { + outputs[loc + i] = offset(reg, bld, 4 * i); + } } } } -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev