v2: Adds the others packing heuristic v3: update enum name --- src/glsl/linker.cpp | 514 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 514 insertions(+), 0 deletions(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 707e645..3f8ef99 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1831,6 +1831,512 @@ public: } }; +namespace pack { + +/** + * Varyings packing can be seen as an instance of the bin packing problem. + * It is a NP hard problem in general. + * + * ES 2.0 specs gives (GLSL_ES_Specification_1.0.17-3.pdf, p111) an algorithm + * that specifies a minimum requirement for when a set of varyings must be + * supported. + * We almost follow the algorithm : the target architecture should be a + * 8x4 grid, we use a {number_of_available_reg}x4 grid. + */ +class buffer_representation +{ +protected: + class delayed_location_assignment : public exec_node + { + public: + ir_variable *producer; + ir_variable *consumer; + unsigned location; + unsigned horizontal_location; + delayed_location_assignment(ir_variable *p, ir_variable *c) + : producer(p), consumer(c) + { + } + + bool is_before(const delayed_location_assignment *comparee) const + { + unsigned local_size = (producer->type->is_array())?producer->type->length:1; + unsigned comparee_size = (comparee->producer->type->is_array())?comparee->producer->type->length:1; + return local_size >= comparee_size; + } + + void set_location_to_vars(unsigned producer_offset, unsigned consumer_offset, unsigned location, unsigned horizontal_location) + { + producer->location = producer_offset + location; + producer->horizontal_location = horizontal_location; + if (consumer) { + consumer->location = consumer_offset + location; + consumer->horizontal_location = horizontal_location; + } + } + + }; + + void *ctx; + unsigned number_of_regs; + bool * is_occupied; + unsigned occupied_space_in_column[4]; + unsigned producer_offset, consumer_offset; + exec_list *stored_vars[8]; + + +#define ARGMAX(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(i):(j) +#define ARGMIN(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(j):(i) + + /** + * Order index from most occupied column index to least occupied column index + */ + void order(unsigned reordered[4]) const + { + unsigned mx1 = ARGMAX(0, 1), mx2 = ARGMAX(2, 3); + unsigned mn1 = ARGMIN(0, 1), mn2 = ARGMIN(2, 3); + + reordered[0] = ARGMAX(mx1, mx2); + unsigned semi_max = ARGMIN(mx1, mx2); + reordered[3] = ARGMIN(mn1, mn2); + unsigned semi_min = ARGMAX(mn1, mn2); + reordered[1] = ARGMAX(semi_max, semi_min); + reordered[2] = ARGMIN(semi_min, semi_max); + } + +#undef ARGMAX +#undef ARGMIN + + INLINE + bool& is_occupied_ref(unsigned index, unsigned components) + { + assert (components < 4 && index < number_of_regs); + return is_occupied[4 * index + components]; + } + + bool is_range_free(unsigned reg, unsigned component_start, unsigned row_occupied, unsigned components_occupied) + { + + if (component_start + components_occupied - 1 > 3) + return false; + if (reg + row_occupied - 1 > number_of_regs - 1) + return false; + for (unsigned j = 0; j < row_occupied; j++) { + for (unsigned i = 0; i < components_occupied; i++) { + if (is_occupied_ref(reg + j, component_start + i)) + return false; + } + } + return true; + } + + void mark_as_occupied(unsigned location, unsigned horizontal_location, unsigned size, unsigned components_occupied) + { + for (unsigned j = 0; j < size; j++) { + for (unsigned i = 0; i < components_occupied; i++) { + is_occupied_ref(location + j, horizontal_location + i) = true; + occupied_space_in_column[horizontal_location + i] ++; + } + } + } + + void insert(delayed_location_assignment *dla, const glsl_type *const type) + { + if (type->is_array()) { + insert(dla, type->fields.array); + } + + // MAT4 + if (type->is_matrix() && type->vector_elements == 4) { + insert_largest_first(stored_vars[1], dla); + return; + } + + // MAT2 + if (type->is_matrix() && type->vector_elements == 2) { + insert_largest_first(stored_vars[2], dla); + return; + } + + // {I,B,}VEC4 + if (type->is_vector() && type->vector_elements == 4) { + insert_largest_first(stored_vars[3], dla); + return; + } + + // MAT3 + if (type->is_matrix() && type->vector_elements == 3) { + insert_largest_first(stored_vars[4], dla); + return; + } + + // {I,B,}VEC3 + if (type->is_vector() && type->vector_elements == 3) { + insert_largest_first(stored_vars[5], dla); + return; + } + + // {I,B,}VEC2 + if (type->is_vector() && type->vector_elements == 2) { + insert_largest_first(stored_vars[6], dla); + return; + } + + // float, int + if (type->is_scalar()) { + insert_largest_first(stored_vars[7], dla); + return; + } + + assert(0 && "varying type not supported"); + return; + } + + void + insert_largest_first(exec_list *lst, delayed_location_assignment *dla) + { + if (lst->is_empty()) { + lst->push_tail(dla); + return; + } + foreach_list(node, lst) { + delayed_location_assignment *dla_from_lst = (class delayed_location_assignment *) node; + if (dla->is_before(dla_from_lst)) { + dla_from_lst->insert_before(dla); + return; + } + + } + lst->push_tail(dla); + } + + void space_occupied (const glsl_type *const type, unsigned &components, unsigned &size) + { + if (type->is_array()) { + space_occupied(type->fields.array, components, size); + size *= type->length; + return; + } + + if (type == glsl_type::mat2_type) { + components = 4; + size = 2; + return; + } + + components = type->vector_elements; + size = type->matrix_columns; + } + + /** + * "For 2,3 and 4 component variables packing is started using the 1st column of the 1st row. + * Variables are then allocated to successive rows, aligning them to the 1st column" + * (ES Spec) + */ + void phase1() + { + unsigned i, current_row = 0; + for (i = 1; i < 8; i++) { + foreach_list_safe (node, stored_vars[i]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + if (is_range_free(current_row, 0, size, components)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0); + mark_as_occupied(current_row, 0, size, components); + dla->remove(); + current_row += size; + } else { + return; + } + } + } + } + + /** + * "For 2 component variables, when there are no spare rows, the strategy is switched to using the + * highest numbered row and the lowest numbered column where the variable will fit. " + * (ES Spec) + */ + void phase2() + { + int current_row = number_of_regs - 1; + foreach_list_safe (node, stored_vars[6]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + while (current_row >= 0) { + if (is_range_free(current_row, 0, size, 2)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0); + mark_as_occupied(current_row, 0, size, 2); + dla->remove(); + current_row -= size; + break; + } else if (is_range_free(current_row, 2, size, 2)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 2); + mark_as_occupied(current_row, 2, size, 2); + dla->remove(); + current_row -= size; + break; + } else { + current_row --; + } + } + if (current_row < 0) + return; + } + } + + /** + * "[1 Component variables] are packed in order of size, largest first. + * Each variable is placed in the column that leaves the least + * amount of space in the column and aligned to the lowest available rows within that column." + * (ES Spec) + */ + void phase3() + { + foreach_list_safe (node, stored_vars[7]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + unsigned reordered_index[4]; + // If we are here, no more space in first column + order(reordered_index); + for (unsigned i = 0; i < 4; i++) { + unsigned free_space = number_of_regs - occupied_space_in_column[reordered_index[i]]; + if (size > free_space) + continue; + else { + // Find first free row ; empty space is ensured to be contiguous + unsigned first_free_row = 0; + while (!is_range_free(first_free_row,reordered_index[i],1,1)) + first_free_row ++ ; + dla->set_location_to_vars(producer_offset, consumer_offset, first_free_row, reordered_index[i]); + mark_as_occupied(first_free_row, reordered_index[i], size, 1); + dla->remove(); + break; + } + } + } + } + + static void report_error(gl_shader_program *prog, exec_list *failing_varyings) + { + foreach_list_const (node, failing_varyings) { + const delayed_location_assignment *dla = (delayed_location_assignment *) node; + linker_error(prog, "Fails to find sufficient resources for varying %s\n", + dla->producer->name); + } + } + +public: + buffer_representation(unsigned p_offset, unsigned c_offset, unsigned n) + : ctx(ralloc_context(NULL)), number_of_regs(n), producer_offset(p_offset), consumer_offset(c_offset) + { + is_occupied = rzalloc_array(ctx, bool, 4 * number_of_regs); + for (unsigned i = 0; i < 8; i++) { + stored_vars[i] = new (ctx) exec_list(); + } + memset(occupied_space_in_column, 0, sizeof(occupied_space_in_column)); + } + + ~buffer_representation() + { + ralloc_free(ctx); + } + + void insert(ir_variable *producer, ir_variable *consumer) + { + assert(producer); + if (consumer) + assert(producer->type == consumer->type); + + delayed_location_assignment *dla = new (ctx) delayed_location_assignment(producer, consumer); + insert(dla, producer->type); + } + + // Note : We need prog to report errors + bool try_pack(gl_shader_program *prog) + { + phase1(); + if (!stored_vars[1]->is_empty() || !stored_vars[2]->is_empty() || !stored_vars[3]->is_empty() || + !stored_vars[4]->is_empty() || !stored_vars[5]->is_empty()) { + + /** + * "For 2 component variables, when there are no spare rows, ... + * Packing of any further 3 or 4 component variables will fail at this point." + * (ES Spec) + */ + report_error(prog, stored_vars[1]); + report_error(prog, stored_vars[2]); + report_error(prog, stored_vars[3]); + report_error(prog, stored_vars[3]); + report_error(prog, stored_vars[4]); + report_error(prog, stored_vars[5]); + return false; + + } + + phase2(); + if (!stored_vars[6]->is_empty()) { + report_error(prog, stored_vars[6]); + return false; + } + phase3(); + if (!stored_vars[7]->is_empty()) { + report_error(prog, stored_vars[7]); + return false; + } + + return true; + } + + +}; + +void count_varyings_per_interp(exec_list *lst, unsigned &none_interp, unsigned &flat_interp, + unsigned &smooth_interp, unsigned &noperspective_interp) +{ + none_interp = 0, flat_interp = 0, smooth_interp = 0, noperspective_interp = 0; + + foreach_list_const(node, lst) { + const varying_info *vi = (class varying_info *) node; + switch (vi->produced->interpolation) { + case INTERP_QUALIFIER_NONE: + none_interp ++; + break; + case INTERP_QUALIFIER_SMOOTH: + smooth_interp ++; + break; + case INTERP_QUALIFIER_FLAT: + flat_interp ++; + break; + case INTERP_QUALIFIER_NOPERSPECTIVE: + noperspective_interp ++; + break; + } + } +} + +void populate_varyings_per_interp(exec_list *lst, buffer_representation &none_interp, buffer_representation &flat_interp, + buffer_representation &smooth_interp,buffer_representation &noperspective_interp) +{ + foreach_list(node, lst) { + varying_info *vi = (class varying_info *) node; + switch (vi->produced->interpolation) { + case INTERP_QUALIFIER_NONE: + none_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_SMOOTH: + smooth_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_FLAT: + flat_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_NOPERSPECTIVE: + noperspective_interp.insert(vi->produced, vi->consumed); + break; + } + } +} + +} + + + +/** + * These function packs varyings according to ES Spec recommandation. + * It will trigger "linker_error" if packing fails. + * This function is conservative, it does not try to pack varyings with different interpolation + */ +bool +varying_pack_no_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings) +{ + // Heuristic : we allocate regs for each interpolation type wrt number of variables using them. + unsigned none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count; + + pack::count_varyings_per_interp(lst, none_interp_count, flat_interp_count, smooth_interp_count, noperspective_interp_count); + + unsigned total = none_interp_count + smooth_interp_count + flat_interp_count + noperspective_interp_count; + if (!total) + return true; + + unsigned none_regs = (max_varyings * none_interp_count ) / total; + unsigned flat_regs = (max_varyings * flat_interp_count ) / total; + unsigned smooth_regs = (max_varyings * smooth_interp_count ) / total; + unsigned noperspective_regs = (max_varyings * noperspective_interp_count ) / total; + + pack::buffer_representation none_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, none_regs); + pack::buffer_representation flat_interp(VERT_RESULT_VAR0 + none_regs, FRAG_ATTRIB_VAR0 + none_regs, flat_regs); + pack::buffer_representation smooth_interp(VERT_RESULT_VAR0 + none_regs + flat_regs, FRAG_ATTRIB_VAR0 + none_regs + flat_regs, smooth_regs); + pack::buffer_representation noperspective_interp(VERT_RESULT_VAR0 + none_regs + flat_regs + smooth_regs, + FRAG_ATTRIB_VAR0 + none_regs + flat_regs + smooth_regs, + noperspective_regs); + + pack::populate_varyings_per_interp(lst, none_interp, flat_interp, smooth_interp, none_interp); + + if (!none_interp.try_pack(prog)) + return false; + if (!flat_interp.try_pack(prog)) + return false; + if (!smooth_interp.try_pack(prog)) + return false; + if (!noperspective_interp.try_pack(prog)) + return false; + + return true; +} + +bool +varying_pack_smooth_and_nopersp_mixed(exec_list *lst, gl_shader_program *prog, unsigned max_varyings) +{ + // Heuristic : we allocate regs for each interpolation type wrt number of variables using them. + unsigned flat_interp_count, mixed_interp_count; + + pack::count_varyings_per_interp(lst, mixed_interp_count, flat_interp_count, mixed_interp_count, mixed_interp_count); + + unsigned total = flat_interp_count + mixed_interp_count; + if (!total) + return true; + + unsigned flat_regs = (max_varyings * flat_interp_count ) / total; + unsigned mixed_interp_regs = (max_varyings * mixed_interp_count ) / total; + + pack::buffer_representation flat_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, flat_regs); + pack::buffer_representation mixed_interp(VERT_RESULT_VAR0 + flat_regs, FRAG_ATTRIB_VAR0 + flat_regs, mixed_interp_regs); + + pack::populate_varyings_per_interp(lst, mixed_interp, flat_interp, mixed_interp, mixed_interp); + + if (!flat_interp.try_pack(prog)) + return false; + if (!mixed_interp.try_pack(prog)) + return false; + + return true; +} + +bool +varying_pack_no_constraints(exec_list *lst, gl_shader_program *prog, unsigned max_varyings) +{ + // Heuristic : we allocate regs for each interpolation type wrt number of variables using them. + unsigned total; + + pack::count_varyings_per_interp(lst, total, total, total, total); + + if (!total) + return true; + + pack::buffer_representation varying_buffer(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, max_varyings); + + pack::populate_varyings_per_interp(lst, varying_buffer, varying_buffer, varying_buffer, varying_buffer); + + if (!varying_buffer.try_pack(prog)) + return false; + + return true; +} + +// Fallback packing function bool varying_pack_avoid(exec_list *lst, gl_shader_program *prog, unsigned max_varyings) { @@ -1931,6 +2437,14 @@ assign_varying_locations(struct gl_context *ctx, bool (* varying_pack)(exec_list *, gl_shader_program *, unsigned); switch (ctx->ShaderCompilerOptions[0].VaryingsPackingConstraint) { + case PACKING_CONSTRAINT_NO_MIXED_INTERPOLATION: + varying_pack = varying_pack_no_mixed; + break; + case PACKING_CONSTRAINT_SMOOTH_NOPERSPECTIVE_MIXED: + varying_pack = varying_pack_smooth_and_nopersp_mixed; + break; + case PACKING_CONSTRAINT_NONE: + varying_pack = varying_pack_no_constraints; default: varying_pack = varying_pack_avoid; break; -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev