---
 src/glsl/linker.cpp |  442 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 442 insertions(+), 0 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 4a5e3bb..8441854 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1815,6 +1815,369 @@ assign_varying_location(ir_variable *input_var, 
ir_variable *output_var,
 
 namespace pack {
 
+/**
+ * Varyings packing can be seen as an instance of the bin packing problem.
+ * It is a NP hard problem in general.
+ *
+ * ES 2.0 specs gives (GLSL_ES_Specification_1.0.17-3.pdf, p111) an algorithm
+ * that specifies a minimum requirement for when a set of varyings must be 
+ * supported.
+ * We almost follow the algorithm : the target architecture should be a
+ * 8x4 grid, we use a {number_of_available_reg}x4 grid.
+ */
+class buffer_representation
+{
+protected:
+   class delayed_location_assignment : public exec_node
+   {
+   public:
+       ir_variable *producer;
+       ir_variable *consumer;
+       unsigned location;
+       unsigned horizontal_location;
+       delayed_location_assignment(ir_variable *p, ir_variable *c)
+          : producer(p), consumer(c)
+       {
+       }
+       
+       bool is_before(const delayed_location_assignment *comparee) const
+       {
+          unsigned local_size = 
(producer->type->is_array())?producer->type->length:1;
+          unsigned comparee_size = 
(comparee->producer->type->is_array())?comparee->producer->type->length:1;
+          return local_size >= comparee_size;
+       }
+       
+       void set_location_to_vars(unsigned producer_offset, unsigned 
consumer_offset, unsigned location, unsigned horizontal_location)
+       {
+          producer->location = producer_offset + location;
+          producer->horizontal_location = horizontal_location;
+          if (consumer) {
+               consumer->location = consumer_offset + location;
+               consumer->horizontal_location = horizontal_location;
+          }
+       }
+       
+   };
+   
+   void *ctx;
+   unsigned number_of_regs;
+   bool * is_occupied;
+   unsigned occupied_space_in_column[4];
+   unsigned producer_offset, consumer_offset;
+   exec_list *stored_vars[8];
+   
+   
+   #define ARGMAX(i, j) (occupied_space_in_column[(i)] >= 
occupied_space_in_column[(j)])?(i):(j)
+   #define ARGMIN(i, j) (occupied_space_in_column[(i)] >= 
occupied_space_in_column[(j)])?(j):(i)
+   
+   /**
+    * Order index from most occupied column index to least occupied column 
index
+    */
+   void order(unsigned reordered[4]) const
+   {
+       unsigned mx1 = ARGMAX(0, 1), mx2 = ARGMAX(2, 3);
+       unsigned mn1 = ARGMIN(0, 1), mn2 = ARGMIN(2, 3);
+       
+       reordered[0] = ARGMAX(mx1, mx2);
+       unsigned semi_max = ARGMIN(mx1, mx2);
+       reordered[3] = ARGMIN(mn1, mn2);
+       unsigned semi_min = ARGMAX(mn1, mn2);
+       reordered[1] = ARGMAX(semi_max, semi_min);
+       reordered[2] = ARGMIN(semi_min, semi_max);
+   }
+   
+   #undef ARGMAX
+   #undef ARGMIN
+   
+   INLINE
+   bool& is_occupied_ref(unsigned index, unsigned components)
+   {
+       assert (components < 4 && index < number_of_regs);
+       return is_occupied[4 * index + components];
+   }
+   
+   bool is_range_free(unsigned reg, unsigned component_start, unsigned 
row_occupied, unsigned components_occupied)
+   {
+
+       if (component_start + components_occupied - 1 > 3)
+          return false;
+       if (reg + row_occupied - 1 > number_of_regs - 1)
+          return false;
+       for (unsigned j = 0; j < row_occupied; j++) {
+          for (unsigned i = 0; i < components_occupied; i++) {
+               if (is_occupied_ref(reg + j, component_start + i))
+                  return false;
+          }
+       }
+       return true;
+   }
+   
+   void mark_as_occupied(unsigned location, unsigned horizontal_location, 
unsigned size, unsigned components_occupied) 
+   {
+       for (unsigned j = 0; j < size; j++) {
+          for (unsigned i = 0; i < components_occupied; i++) {
+               is_occupied_ref(location + j, horizontal_location + i) = true;
+               occupied_space_in_column[horizontal_location + i] ++;
+          }
+       }
+   }
+   
+   void insert(delayed_location_assignment *dla, const glsl_type *const type)
+   {
+       if (type->is_array()) {
+          insert(dla, type->fields.array);
+       }
+       
+       // MAT4
+       if (type->is_matrix() && type->vector_elements == 4) {
+          insert_largest_first(stored_vars[1], dla);
+          return;
+       }
+       
+       // MAT2
+       if (type->is_matrix() && type->vector_elements == 2) {
+          insert_largest_first(stored_vars[2], dla);
+          return;
+       }
+       
+       // {I,B,}VEC4
+       if (type->is_vector() && type->vector_elements == 4) {
+          insert_largest_first(stored_vars[3], dla);
+          return;
+       }
+       
+       // MAT3
+       if (type->is_matrix() && type->vector_elements == 3) {
+          insert_largest_first(stored_vars[4], dla);
+          return;
+       }
+       
+       // {I,B,}VEC3
+       if (type->is_vector() && type->vector_elements == 3) {
+          insert_largest_first(stored_vars[5], dla);
+          return;
+       }
+       
+       // {I,B,}VEC2
+       if (type->is_vector() && type->vector_elements == 2) {
+          insert_largest_first(stored_vars[6], dla);
+          return;
+       }
+
+       // float, int   
+       if (type->is_scalar()) {
+          insert_largest_first(stored_vars[7], dla);
+          return;
+       }
+       
+       assert(0 && "varying type not supported");
+       return;
+   }
+   
+   void
+   insert_largest_first(exec_list *lst, delayed_location_assignment *dla)
+   {
+       if (lst->is_empty()) {
+          lst->push_tail(dla);
+          return;
+       }
+       foreach_list(node, lst) {
+          delayed_location_assignment *dla_from_lst = (class 
delayed_location_assignment *) node;
+          if (dla->is_before(dla_from_lst)) {
+               dla_from_lst->insert_before(dla);
+               return;
+          }
+          
+       }
+       lst->push_tail(dla);
+   }
+   
+   void space_occupied (const glsl_type *const type, unsigned &components, 
unsigned &size)
+   {
+       if (type->is_array()) {
+          space_occupied(type->fields.array, components, size);
+          size *= type->length;
+          return;
+       }
+       
+       if (type == glsl_type::mat2_type) {
+          components = 4;
+          size = 2;
+          return;
+       }
+       
+       components = type->vector_elements;
+       size = type->matrix_columns;
+   }
+   
+   /**
+    * "For 2,3 and 4 component variables packing is started using the 1st 
column of the 1st row.
+    *  Variables are then allocated to successive rows, aligning them to the 
1st column"
+    * (ES Spec)
+    */
+   void phase1() 
+   {
+       unsigned i, current_row = 0;
+       for (i = 1; i < 8; i++) {
+          foreach_list_safe (node, stored_vars[i]) {
+               delayed_location_assignment *dla = (class 
delayed_location_assignment *) node;
+               unsigned components, size;
+               space_occupied(dla->producer->type, components, size);
+               if (is_range_free(current_row, 0, size, components)) {
+                  dla->set_location_to_vars(producer_offset, consumer_offset, 
current_row, 0);
+                  mark_as_occupied(current_row, 0, size, components);
+                  dla->remove();
+                  current_row += size;
+               } else {
+                  return;
+               }
+          }
+       }
+   }
+   
+   /**
+    * "For 2 component variables, when there are no spare rows, the strategy 
is switched to using the
+    * highest numbered row and the lowest numbered column where the variable 
will fit. "
+    * (ES Spec)
+    */
+   void phase2()
+   {
+       int current_row = number_of_regs - 1;
+       foreach_list_safe (node, stored_vars[6]) {
+          delayed_location_assignment *dla = (class 
delayed_location_assignment *) node;
+          unsigned components, size;
+          space_occupied(dla->producer->type, components, size);
+          while (current_row >= 0) {
+               if (is_range_free(current_row, 0, size, 2)) {
+                  dla->set_location_to_vars(producer_offset, consumer_offset, 
current_row, 0);
+                  mark_as_occupied(current_row, 0, size, 2);
+                  dla->remove();
+                  current_row -= size;
+                  break;
+               } else if (is_range_free(current_row, 2, size, 2)) {
+                  dla->set_location_to_vars(producer_offset, consumer_offset, 
current_row, 2);
+                  mark_as_occupied(current_row, 2, size, 2);
+                  dla->remove();
+                  current_row -= size;
+                  break;   
+               } else {
+                  current_row --;
+               }
+          }
+          if (current_row < 0)
+               return;
+       }
+   }
+   
+   /**
+    * "[1 Component variables] are packed in order of size, largest first. 
+    * Each variable is placed in the column that leaves the least
+    * amount of space in the column and aligned to the lowest available rows 
within that column."
+    * (ES Spec)
+    */
+   void phase3()
+   {
+       foreach_list_safe (node, stored_vars[7]) {
+          delayed_location_assignment *dla = (class 
delayed_location_assignment *) node;
+          unsigned components, size;
+          space_occupied(dla->producer->type, components, size);
+          unsigned reordered_index[4];
+          // If we are here, no more space in first column
+          order(reordered_index);
+          for (unsigned i = 0; i < 4; i++) {
+               unsigned free_space = number_of_regs - 
occupied_space_in_column[reordered_index[i]];
+               if (size > free_space)
+                  continue;
+               else {
+                  // Find first free row ; empty space is ensured to be 
contiguous
+                  unsigned first_free_row = 0;
+                  while (!is_range_free(first_free_row,reordered_index[i],1,1))
+                       first_free_row ++ ;
+                  dla->set_location_to_vars(producer_offset, consumer_offset, 
first_free_row, reordered_index[i]);
+                  mark_as_occupied(first_free_row, reordered_index[i], size, 
1);
+                  dla->remove();
+                  break;
+               }
+          }
+       }
+   }
+   
+   static void report_error(gl_shader_program *prog, exec_list 
*failing_varyings)
+   {
+       foreach_list_const (node, failing_varyings) {
+          const delayed_location_assignment *dla = 
(delayed_location_assignment *) node;
+          linker_error(prog, "Fails to find sufficient resources for varying 
%s\n",
+                       dla->producer->name);   
+       }
+   }
+   
+public:
+   buffer_representation(unsigned p_offset, unsigned c_offset, unsigned n)
+      : ctx(ralloc_context(NULL)), number_of_regs(n), 
producer_offset(p_offset), consumer_offset(c_offset)
+   {
+       is_occupied = rzalloc_array(ctx, bool, 4 * number_of_regs);
+       for (unsigned i = 0; i < 8; i++) {
+          stored_vars[i] = new (ctx) exec_list();
+       }
+       memset(occupied_space_in_column, 0, sizeof(occupied_space_in_column));
+   }
+   
+   ~buffer_representation()
+   {
+       ralloc_free(ctx);
+   }
+   
+   void insert(ir_variable *producer, ir_variable *consumer)
+   {
+       assert(producer);
+       if (consumer)
+          assert(producer->type == consumer->type);
+       
+       delayed_location_assignment *dla = new (ctx) 
delayed_location_assignment(producer, consumer);
+       insert(dla, producer->type);
+   }
+   
+   // Note : We need prog to report errors
+   bool try_pack(gl_shader_program *prog)
+   {
+       phase1();
+       if (!stored_vars[1]->is_empty() || !stored_vars[2]->is_empty() || 
!stored_vars[3]->is_empty() || 
+           !stored_vars[4]->is_empty() || !stored_vars[5]->is_empty()) {
+           
+          /**
+           * "For 2 component variables, when there are no spare rows, ...
+           * Packing of any further 3 or 4 component variables will fail at 
this point."
+           * (ES Spec)
+           */
+          report_error(prog, stored_vars[1]);
+          report_error(prog, stored_vars[2]);
+          report_error(prog, stored_vars[3]);
+          report_error(prog, stored_vars[3]);
+          report_error(prog, stored_vars[4]);
+          report_error(prog, stored_vars[5]);
+          return false;
+          
+       }
+       
+       phase2();
+       if (!stored_vars[6]->is_empty()) {
+          report_error(prog, stored_vars[6]);
+          return false;
+       }
+       phase3();
+       if (!stored_vars[7]->is_empty()) {
+          report_error(prog, stored_vars[7]);
+          return false;
+       }
+       
+       return true;
+   }
+   
+   
+};
+
+}
+
 class varying_info : public exec_node
 {
 public:
@@ -1827,6 +2190,81 @@ public:
    }
 };
 
+/**
+ * This function packs varyings according to ES Spec recommandation.
+ * It will trigger "linker_error" if packing fails.
+ * This function is conservative, it does not try to pack varyings with 
different interpolation
+ */
+bool
+varying_pack_no_mixed(exec_list *lst, gl_shader_program *prog, unsigned 
max_varyings)
+{
+   // Heuristic : we allocate regs for each interpolation type wrt number of 
variables using them.
+   unsigned none_interp_count = 0, flat_interp_count = 0, smooth_interp_count 
= 0, noperspective_interp_count = 0;
+   
+   foreach_list_const(node, lst) {
+       const varying_info *vi = (class varying_info *) node;
+       switch (vi->produced->interpolation) {
+       case INTERP_QUALIFIER_NONE:
+          none_interp_count ++;
+          break;
+       case INTERP_QUALIFIER_SMOOTH:
+          smooth_interp_count ++;
+          break;
+       case INTERP_QUALIFIER_FLAT:
+          flat_interp_count ++;
+          break;
+       case INTERP_QUALIFIER_NOPERSPECTIVE:
+          noperspective_interp_count ++;
+          break;
+       }
+   }
+   
+   unsigned total = none_interp_count + smooth_interp_count + 
flat_interp_count + noperspective_interp_count;
+   if (!total)
+       return true;
+   
+   unsigned none_regs = (max_varyings * none_interp_count ) / total;
+   unsigned flat_regs = (max_varyings * flat_interp_count ) / total;
+   unsigned smooth_regs = (max_varyings * smooth_interp_count ) / total;
+   unsigned noperspective_regs = (max_varyings * noperspective_interp_count ) 
/ total;
+
+   pack::buffer_representation none_interp(VERT_RESULT_VAR0, FRAG_ATTRIB_VAR0, 
none_regs);
+   pack::buffer_representation flat_interp(VERT_RESULT_VAR0 + none_regs, 
FRAG_ATTRIB_VAR0 + none_regs, flat_regs);
+   pack::buffer_representation smooth_interp(VERT_RESULT_VAR0 + none_regs + 
flat_regs, FRAG_ATTRIB_VAR0 + none_regs + flat_regs, smooth_regs);
+   pack::buffer_representation noperspective_interp(VERT_RESULT_VAR0 + 
none_regs + flat_regs + smooth_regs,
+                                                    FRAG_ATTRIB_VAR0 + 
none_regs + flat_regs + smooth_regs,
+                                                    noperspective_regs);
+   
+   foreach_list(node, lst) {
+       varying_info *vi = (class varying_info *) node;
+       switch (vi->produced->interpolation) {
+       case INTERP_QUALIFIER_NONE:
+          none_interp.insert(vi->produced, vi->consumed);
+          break;
+       case INTERP_QUALIFIER_SMOOTH:
+          smooth_interp.insert(vi->produced, vi->consumed);
+          break;
+       case INTERP_QUALIFIER_FLAT:
+          flat_interp.insert(vi->produced, vi->consumed);
+          break;
+       case INTERP_QUALIFIER_NOPERSPECTIVE:
+          noperspective_interp.insert(vi->produced, vi->consumed);
+          break;
+       }
+   }
+   
+   if (!none_interp.try_pack(prog))
+       return false;
+   if (!flat_interp.try_pack(prog))
+       return false;
+   if (!smooth_interp.try_pack(prog))
+       return false;
+   if (!noperspective_interp.try_pack(prog))
+       return false;
+   
+   return true;
+}
+
 bool
 varying_pack_avoid(exec_list *lst, gl_shader_program *prog, unsigned 
max_varyings)
 {
@@ -1927,6 +2365,10 @@ assign_varying_locations(struct gl_context *ctx,
    bool (* varying_pack)(exec_list *, gl_shader_program *, unsigned);
 
    switch (ctx->ShaderCompilerOptions[0].VaryingsPackingConstraint) {
+   case NO_MIXED_INTERPOLATION:
+   case SMOOTH_NOPERSPECTIVE_MIXED:
+      varying_pack = varying_pack_no_mixed;
+      break;
    default:
       varying_pack = varying_pack_avoid;
       break;
-- 
1.7.7

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to