--- src/glsl/linker.cpp | 659 ++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 550 insertions(+), 109 deletions(-)
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 5095751..66c1671 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1458,6 +1458,8 @@ private: * variable. -1 if a location hasn't been assigned yet. */ int location; + + unsigned horizontal_location; /** * If location != -1, the number of vector elements in this variable, or 1 @@ -1496,6 +1498,7 @@ tfeedback_decl::init(struct gl_context *ctx, struct gl_shader_program *prog, */ this->location = -1; + this->horizontal_location = 0; this->orig_name = input; this->is_clip_distance_mesa = false; @@ -1602,6 +1605,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx, this->vector_elements = output_var->type->vector_elements; this->matrix_columns = output_var->type->matrix_columns; this->type = output_var->type->gl_type; + this->horizontal_location = output_var->horizontal_location; } /* From GL_EXT_transform_feedback: @@ -1687,7 +1691,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, for (unsigned v = 0; v < this->matrix_columns; ++v) { unsigned num_components = this->vector_elements; assert(info->NumOutputs < max_outputs); - info->Outputs[info->NumOutputs].ComponentOffset = 0; + info->Outputs[info->NumOutputs].ComponentOffset = this->horizontal_location; if (this->is_clip_distance_mesa) { if (this->is_subscripted) { num_components = 1; @@ -1809,6 +1813,456 @@ assign_varying_location(ir_variable *input_var, ir_variable *output_var, } } +namespace pack { + +/** + * Varyings packing can be seen as an instance of the bin packing problem. + * It is a NP hard problem in general. + * + * ES 2.0 specs gives (GLSL_ES_Specification_1.0.17-3.pdf, p111) an algorithm + * that specifies a minimum requirement for when a set of varyings must be + * supported. + * We almost follow the algorithm : the target architecture should be a + * 8x4 grid, we use a {number_of_available_reg}x4 grid. + */ +class buffer_representation +{ +protected: + class delayed_location_assignment : public exec_node + { + public: + ir_variable *producer; + ir_variable *consumer; + unsigned location; + unsigned horizontal_location; + delayed_location_assignment(ir_variable *p, ir_variable *c) + : producer(p), consumer(c) + { + } + + bool is_before(const delayed_location_assignment *comparee) const + { + unsigned local_size = (producer->type->is_array())?producer->type->length:1; + unsigned comparee_size = (comparee->producer->type->is_array())?comparee->producer->type->length:1; + return local_size >= comparee_size; + } + + void set_location_to_vars(unsigned producer_offset, unsigned consumer_offset, unsigned location, unsigned horizontal_location) + { + producer->location = producer_offset + location; + producer->horizontal_location = horizontal_location; + if (consumer) { + consumer->location = consumer_offset + location; + consumer->horizontal_location = horizontal_location; + } + } + + }; + + void *ctx; + unsigned number_of_regs; + bool * is_occupied; + unsigned occupied_space_in_column[4]; + unsigned producer_offset, consumer_offset; + exec_list *stored_vars[8]; + + + #define ARGMAX(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(i):(j) + #define ARGMIN(i, j) (occupied_space_in_column[(i)] >= occupied_space_in_column[(j)])?(j):(i) + + /** + * Order index from most occupied column index to least occupied column index + */ + void order(unsigned reordered[4]) const + { + unsigned mx1 = ARGMAX(0, 1), mx2 = ARGMAX(2, 3); + unsigned mn1 = ARGMIN(0, 1), mn2 = ARGMIN(2, 3); + + reordered[0] = ARGMAX(mx1, mx2); + unsigned semi_max = ARGMIN(mx1, mx2); + reordered[3] = ARGMIN(mn1, mn2); + unsigned semi_min = ARGMAX(mn1, mn2); + reordered[1] = ARGMAX(semi_max, semi_min); + reordered[2] = ARGMIN(semi_min, semi_max); + } + + #undef ARGMAX + #undef ARGMIN + + INLINE + bool& is_occupied_ref(unsigned index, unsigned components) + { + assert (components < 4 && index < number_of_regs); + return is_occupied[4 * index + components]; + } + + bool is_range_free(unsigned reg, unsigned component_start, unsigned row_occupied, unsigned components_occupied) + { + + if (component_start + components_occupied - 1 > 3) + return false; + if (reg + row_occupied - 1 > number_of_regs - 1) + return false; + for (unsigned j = 0; j < row_occupied; j++) { + for (unsigned i = 0; i < components_occupied; i++) { + if (is_occupied_ref(reg + j, component_start + i)) + return false; + } + } + return true; + } + + void mark_as_occupied(unsigned location, unsigned horizontal_location, unsigned size, unsigned components_occupied) + { + for (unsigned j = 0; j < size; j++) { + for (unsigned i = 0; i < components_occupied; i++) { + is_occupied_ref(location + j, horizontal_location + i) = true; + occupied_space_in_column[horizontal_location + i] ++; + } + } + } + + void insert(delayed_location_assignment *dla, const glsl_type *const type) + { + if (type->is_array()) { + return insert(dla, type->fields.array); + } + + // MAT4 + if (type->is_matrix() && type->vector_elements == 4) { + insert_largest_first(stored_vars[1], dla); + return; + } + + // MAT2 + if (type->is_matrix() && type->vector_elements == 2) { + insert_largest_first(stored_vars[2], dla); + return; + } + + // {I,B,}VEC4 + if (type->is_vector() && type->vector_elements == 4) { + insert_largest_first(stored_vars[3], dla); + return; + } + + // MAT3 + if (type->is_matrix() && type->vector_elements == 3) { + insert_largest_first(stored_vars[4], dla); + return; + } + + // {I,B,}VEC3 + if (type->is_vector() && type->vector_elements == 3) { + insert_largest_first(stored_vars[5], dla); + return; + } + + // {I,B,}VEC2 + if (type->is_vector() && type->vector_elements == 2) { + insert_largest_first(stored_vars[6], dla); + return; + } + + // float, int + if (type->is_scalar()) { + insert_largest_first(stored_vars[7], dla); + return; + } + + assert(0 && "varying type not supported"); + return; + } + + void + insert_largest_first(exec_list *lst, delayed_location_assignment *dla) + { + if (lst->is_empty()) { + lst->push_tail(dla); + return; + } + foreach_list(node, lst) { + delayed_location_assignment *dla_from_lst = (class delayed_location_assignment *) node; + if (dla->is_before(dla_from_lst)) { + dla_from_lst->insert_before(dla); + return; + } + + } + } + + void space_occupied (const glsl_type *const type, unsigned &components, unsigned &size) + { + if (type->is_array()) { + space_occupied(type->fields.array, components, size); + size *= type->length; + return; + } + + if (type == glsl_type::mat2_type) { + components = 4; + size = 2; + return; + } + + components = type->vector_elements; + size = type->matrix_columns; + } + + /** + * "For 2,3 and 4 component variables packing is started using the 1st column of the 1st row. + * Variables are then allocated to successive rows, aligning them to the 1st column" + * (ES Spec) + */ + void phase1() + { + unsigned i, current_row = 0; + for (i = 1; i < 8; i++) { + foreach_list_safe (node, stored_vars[i]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + if (is_range_free(current_row, 0, size, components)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0); + mark_as_occupied(current_row, 0, size, components); + dla->remove(); + current_row += size; + } else { + return; + } + } + } + } + + /** + * "For 2 component variables, when there are no spare rows, the strategy is switched to using the + * highest numbered row and the lowest numbered column where the variable will fit. " + * (ES Spec) + */ + void phase2() + { + int current_row = number_of_regs - 1; + foreach_list_safe (node, stored_vars[6]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + while (current_row >= 0) { + if (is_range_free(current_row, 0, size, 2)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 0); + mark_as_occupied(current_row, 0, size, 2); + dla->remove(); + current_row -= size; + break; + } else if (is_range_free(current_row, 2, size, 2)) { + dla->set_location_to_vars(producer_offset, consumer_offset, current_row, 2); + mark_as_occupied(current_row, 2, size, 2); + dla->remove(); + current_row -= size; + break; + } else { + current_row --; + } + } + if (current_row < 0) + return; + } + } + + /** + * "[1 Component variables] are packed in order of size, largest first. + * Each variable is placed in the column that leaves the least + * amount of space in the column and aligned to the lowest available rows within that column." + * (ES Spec) + */ + void phase3() + { + foreach_list_safe (node, stored_vars[7]) { + delayed_location_assignment *dla = (class delayed_location_assignment *) node; + unsigned components, size; + space_occupied(dla->producer->type, components, size); + unsigned reordered_index[4]; + // If we are here, no more space in first column + order(reordered_index); + for (unsigned i = 0; i < 4; i++) { + unsigned free_space = number_of_regs - occupied_space_in_column[reordered_index[i]]; + if (size > free_space) + continue; + else { + // Find first free row ; empty space is ensured to be contiguous + unsigned first_free_row = 0; + while (!is_range_free(first_free_row,reordered_index[i],1,1)) + first_free_row ++ ; + dla->set_location_to_vars(producer_offset, consumer_offset, first_free_row, reordered_index[i]); + mark_as_occupied(first_free_row, reordered_index[i], size, 1); + dla->remove(); + break; + } + } + } + } + + static void report_error(gl_shader_program *prog, exec_list *failing_varyings) + { + foreach_list_const (node, failing_varyings) { + const delayed_location_assignment *dla = (delayed_location_assignment *) node; + linker_error(prog, "Fails to find sufficient resources for varying %s\n", + dla->producer->name); + } + } + +public: + buffer_representation(unsigned p_offset, unsigned c_offset, unsigned n) + : ctx(ralloc_context(NULL)), number_of_regs(n), producer_offset(p_offset), consumer_offset(c_offset) + { + is_occupied = rzalloc_array(ctx, bool, 4 * number_of_regs); + for (unsigned i = 0; i < 8; i++) { + stored_vars[i] = new (ctx) exec_list(); + } + memset(occupied_space_in_column, 0, sizeof(occupied_space_in_column)); + } + + ~buffer_representation() + { + ralloc_free(ctx); + } + + void insert(ir_variable *producer, ir_variable *consumer) + { + assert(producer); + if (consumer) + assert(producer->type == consumer->type); + + delayed_location_assignment *dla = new (ctx) delayed_location_assignment(producer, consumer); + insert(dla, producer->type); + } + + // Note : We need prog to report errors + bool try_pack(gl_shader_program *prog) + { + phase1(); + if (!stored_vars[1]->is_empty() || !stored_vars[2]->is_empty() || !stored_vars[3]->is_empty() || + !stored_vars[4]->is_empty() || !stored_vars[5]->is_empty()) { + + /** + * "For 2 component variables, when there are no spare rows, ... + * Packing of any further 3 or 4 component variables will fail at this point." + * (ES Spec) + */ + report_error(prog, stored_vars[1]); + report_error(prog, stored_vars[2]); + report_error(prog, stored_vars[3]); + report_error(prog, stored_vars[3]); + report_error(prog, stored_vars[4]); + report_error(prog, stored_vars[5]); + return false; + + } + + phase2(); + if (!stored_vars[6]->is_empty()) { + report_error(prog, stored_vars[6]); + return false; + } + phase3(); + if (!stored_vars[7]->is_empty()) { + report_error(prog, stored_vars[7]); + return false; + } + + return true; + } + + +}; + +} + +class varying_info : public exec_node +{ +public: + ir_variable *produced; + ir_variable *consumed; + varying_info( ir_variable *p, ir_variable *c) + : produced(p), consumed(c) + { + + } +}; + +/** + * This function packs varyings according to ES Spec recommandation. + * It will trigger "linker_error" if packing fails. + * This function is conservative, it does not try to pack varyings with different interpolation + */ +bool +varying_pack(exec_list *lst, gl_shader_program *prog, unsigned max_varyings) +{ + // Heuristic : we allocate regs for each interpolation type wrt number of variables using them. + unsigned none_interp_count = 0, flat_interp_count = 0, smooth_interp_count = 0, noperspective_interp_count = 0; + + foreach_list_const(node, lst) { + const varying_info *vi = (class varying_info *) node; + switch (vi->produced->interpolation) { + case INTERP_QUALIFIER_NONE: + none_interp_count ++; + break; + case INTERP_QUALIFIER_SMOOTH: + smooth_interp_count ++; + break; + case INTERP_QUALIFIER_FLAT: + flat_interp_count ++; + break; + case INTERP_QUALIFIER_NOPERSPECTIVE: + noperspective_interp_count ++; + break; + } + } + + unsigned total = none_interp_count + smooth_interp_count + flat_interp_count + noperspective_interp_count; + if (!total) + return true; + + unsigned none_regs = (max_varyings * none_interp_count ) / total; + unsigned flat_regs = (max_varyings * flat_interp_count ) / total; + unsigned smooth_regs = (max_varyings * smooth_interp_count ) / total; + unsigned noperspective_regs = (max_varyings * noperspective_interp_count ) / total; + + pack::buffer_representation none_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, none_regs); + pack::buffer_representation flat_interp(VERT_RESULT_VAR0 + none_regs, FRAG_ATTRIB_VAR0 + none_regs, flat_regs); + pack::buffer_representation smooth_interp(VERT_RESULT_VAR0 + none_regs + flat_regs, FRAG_ATTRIB_VAR0 + none_regs + flat_regs, smooth_regs); + pack::buffer_representation noperspective_interp(VERT_RESULT_VAR0 + none_regs + flat_regs + smooth_regs, + FRAG_ATTRIB_VAR0 + none_regs + flat_regs + smooth_regs, + noperspective_regs); + + foreach_list(node, lst) { + varying_info *vi = (class varying_info *) node; + switch (vi->produced->interpolation) { + case INTERP_QUALIFIER_NONE: + none_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_SMOOTH: + smooth_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_FLAT: + flat_interp.insert(vi->produced, vi->consumed); + break; + case INTERP_QUALIFIER_NOPERSPECTIVE: + noperspective_interp.insert(vi->produced, vi->consumed); + break; + } + } + + if (!none_interp.try_pack(prog)) + return false; + if (!flat_interp.try_pack(prog)) + return false; + if (!smooth_interp.try_pack(prog)) + return false; + if (!noperspective_interp.try_pack(prog)) + return false; + + return true; +} /** * Assign locations for all variables that are produced in one pipeline stage @@ -1832,15 +2286,14 @@ assign_varying_location(ir_variable *input_var, ir_variable *output_var, */ bool assign_varying_locations(struct gl_context *ctx, - struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer, + struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer, unsigned num_tfeedback_decls, tfeedback_decl *tfeedback_decls) { - /* FINISHME: Set dynamically when geometry shader support is added. */ - unsigned output_index = VERT_RESULT_VAR0; - unsigned input_index = FRAG_ATTRIB_VAR0; - + void *rctx = ralloc_context(NULL); + exec_list *delayed_assignment = new (rctx) exec_list(); + /* Operate in a total of three passes. * * 1. Assign locations for any matching inputs and outputs. @@ -1851,115 +2304,103 @@ assign_varying_locations(struct gl_context *ctx, * 3. Mark input variables in the consumer that do not have locations as * not being inputs. This lets the optimizer eliminate them. */ - + link_invalidate_variable_locations(producer, ir_var_out, VERT_RESULT_VAR0); if (consumer) - link_invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0); - + link_invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0); + foreach_list(node, producer->ir) { - ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); - - if ((output_var == NULL) || (output_var->mode != ir_var_out)) - continue; - - ir_variable *input_var = - consumer ? consumer->symbols->get_variable(output_var->name) : NULL; - - if (input_var && input_var->mode != ir_var_in) - input_var = NULL; - - if (input_var) { - assign_varying_location(input_var, output_var, &input_index, - &output_index); - } - - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].is_assigned() && - tfeedback_decls[i].matches_var(output_var)) { - if (output_var->location == -1) { - assign_varying_location(input_var, output_var, &input_index, - &output_index); - } - if (!tfeedback_decls[i].assign_location(ctx, prog, output_var)) - return false; - } - } + ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); + + if ((output_var == NULL) || (output_var->mode != ir_var_out)) + continue; + + ir_variable *input_var = + consumer ? consumer->symbols->get_variable(output_var->name) : NULL; + + if (input_var && input_var->mode != ir_var_in) + input_var = NULL; + + varying_info *vi = NULL; + + if (input_var) { + vi = new (rctx) varying_info (output_var, input_var); + delayed_assignment->push_tail(vi); + } + + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].is_assigned() && + tfeedback_decls[i].matches_var(output_var)) { + if (output_var->location != -1) { + if (!tfeedback_decls[i].assign_location(ctx, prog, output_var)) { + ralloc_free(rctx); + return false; + } + } else if (!vi) { + vi = new (rctx) varying_info (output_var, NULL); + delayed_assignment->push_tail(vi); + } + } + } } - unsigned varying_vectors = 0; - - if (consumer) { - foreach_list(node, consumer->ir) { - ir_variable *const var = ((ir_instruction *) node)->as_variable(); - - if ((var == NULL) || (var->mode != ir_var_in)) - continue; - - if (var->location == -1) { - if (prog->Version <= 120) { - /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: - * - * Only those varying variables used (i.e. read) in - * the fragment shader executable must be written to - * by the vertex shader executable; declaring - * superfluous varying variables in a vertex shader is - * permissible. - * - * We interpret this text as meaning that the VS must - * write the variable for the FS to read it. See - * "glsl1-varying read but not written" in piglit. - */ - - linker_error(prog, "fragment shader varying %s not written " - "by vertex shader\n.", var->name); - } - - /* An 'in' variable is only really a shader input if its - * value is written by the previous stage. - */ - var->mode = ir_var_auto; - } else { - /* The packing rules are used for vertex shader inputs are also - * used for fragment shader inputs. - */ - varying_vectors += count_attribute_slots(var->type); - } - } + // Should be driver dependant function + if (!varying_pack(delayed_assignment, prog, ctx->Const.MaxVarying)) { + ralloc_free(rctx); + return false; } - - if (ctx->API == API_OPENGLES2 || prog->Version == 100) { - if (varying_vectors > ctx->Const.MaxVarying) { - if (ctx->Const.GLSLSkipStrictMaxVaryingLimitCheck) { - linker_warning(prog, "shader uses too many varying vectors " - "(%u > %u), but the driver will try to optimize " - "them out; this is non-portable out-of-spec " - "behavior\n", - varying_vectors, ctx->Const.MaxVarying); - } else { - linker_error(prog, "shader uses too many varying vectors " - "(%u > %u)\n", - varying_vectors, ctx->Const.MaxVarying); - return false; - } - } - } else { - const unsigned float_components = varying_vectors * 4; - if (float_components > ctx->Const.MaxVarying * 4) { - if (ctx->Const.GLSLSkipStrictMaxVaryingLimitCheck) { - linker_warning(prog, "shader uses too many varying components " - "(%u > %u), but the driver will try to optimize " - "them out; this is non-portable out-of-spec " - "behavior\n", - float_components, ctx->Const.MaxVarying * 4); - } else { - linker_error(prog, "shader uses too many varying components " - "(%u > %u)\n", - float_components, ctx->Const.MaxVarying * 4); - return false; - } - } + + // Do TFB assignments + foreach_list(node, delayed_assignment) { + varying_info *vi = (varying_info *) node; + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + printf("name is %s, at %d,%d\n", vi->produced->name, vi->produced->location, vi->produced->horizontal_location); + printf("assigned is %d\n",tfeedback_decls[i].matches_var(vi->produced)); + if (!tfeedback_decls[i].is_assigned() && + tfeedback_decls[i].matches_var(vi->produced)) { + if (!tfeedback_decls[i].assign_location(ctx, prog, vi->produced)) { + ralloc_free(rctx); + return false; + } + } + } + } + //linker_error(prog, "protect"); + + if (consumer) { + foreach_list(node, consumer->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != ir_var_in)) + continue; + + if (var->location == -1) { + if (prog->Version <= 120) { + /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: + * + * Only those varying variables used (i.e. read) in + * the fragment shader executable must be written to + * by the vertex shader executable; declaring + * superfluous varying variables in a vertex shader is + * permissible. + * + * We interpret this text as meaning that the VS must + * write the variable for the FS to read it. See + * "glsl1-varying read but not written" in piglit. + */ + + linker_error(prog, "fragment shader varying %s not written " + "by vertex shader\n.", var->name); + } + + /* An 'in' variable is only really a shader input if its + * value is written by the previous stage. + */ + var->mode = ir_var_auto; + } + } } - + ralloc_free(rctx); return true; } -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev