glsl_to_tgsi: adjust swizzles and writemasks for explicit components

Nicolai Hähnle Fri, 07 Oct 2016 12:56:54 -0700

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 70 ++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 19 deletions(-)


diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 47725f2..db8ebdf 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -60,33 +60,48 @@
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
 
 #define MAX_GLSL_TEXTURE_OFFSET 4
 
 class st_src_reg;
 class st_dst_reg;
 
 static int swizzle_for_size(int size);
 
+static int swizzle_for_type(const glsl_type *type, int component = 0)
+{
+   unsigned num_elements = 4;
+
+   if (type) {
+      type = type->without_array();
+      if (type->is_scalar() || type->is_vector() || type->is_matrix())
+         num_elements = type->vector_elements;
+   }
+
+   int swizzle = swizzle_for_size(num_elements);
+   assert(num_elements + component <= 4);
+
+   swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
+   return swizzle;
+}
+
 /**
  * This struct is a corresponding struct to TGSI ureg_src.
  */
 class st_src_reg {
 public:
-   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   st_src_reg(gl_register_file file, int index, const glsl_type *type,
+              int component = 0)
    {
       this->file = file;
       this->index = index;
-      if (type && (type->is_scalar() || type->is_vector() || 
type->is_matrix()))
-         this->swizzle = swizzle_for_size(type->vector_elements);
-      else
-         this->swizzle = SWIZZLE_XYZW;
+      this->swizzle = swizzle_for_type(type, component);
       this->negate = 0;
       this->index2D = 0;
       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
       this->reladdr = NULL;
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
       this->array_id = 0;
       this->is_double_vertex_input = false;
    }
@@ -272,27 +287,33 @@ public:
    unsigned buffer_access; /**< buffer access type */
 
    class function_entry *function; /* Set on TGSI_OPCODE_CAL or 
TGSI_OPCODE_BGNSUB */
    const struct tgsi_opcode_info *info;
 };
 
 class variable_storage : public exec_node {
 public:
    variable_storage(ir_variable *var, gl_register_file file, int index,
                     unsigned array_id = 0)
-      : file(file), index(index), var(var), array_id(array_id)
+      : file(file), index(index), component(0), var(var), array_id(array_id)
    {
       /* empty */
    }
 
    gl_register_file file;
    int index;
+
+   /* Explicit component location. This is given in terms of the GLSL-style
+    * swizzles where each double is a single component, i.e. for 64-bit types
+    * it can only be 0 or 1.
+    */
+   int component;
    ir_variable *var; /* variable that maps to this, if any */
    unsigned array_id;
 };
 
 class immediate_storage : public exec_node {
 public:
    immediate_storage(gl_constant_value *values, int size32, int type)
    {
       memcpy(this->values, values, size32 * sizeof(gl_constant_value));
       this->size32 = size32;
@@ -2380,76 +2401,88 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
       case ir_var_shader_in: {
          /* The linker assigns locations for varyings and attributes,
           * including deprecated builtins (like gl_Color), user-assign
           * generic attributes (glBindVertexLocation), and
           * user-defined varyings.
           */
          assert(var->data.location != -1);
 
          const glsl_type *type_without_array = var->type->without_array();
          struct inout_decl *decl = &inputs[num_inputs];
+         unsigned component = var->data.location_frac;
          num_inputs++;
 
+         if (type_without_array->is_64bit())
+            component = component / 2;
+
          decl->mesa_index = var->data.location;
          decl->base_type = type_without_array->base_type;
-         decl->usage_mask = u_bit_consecutive(0,
+         decl->usage_mask = u_bit_consecutive(component,
                                               
type_without_array->vector_elements);
 
          if (is_inout_array(shader->Stage, var, &is_2d)) {
             decl->array_id = num_input_arrays + 1;
             if (is_2d) {
                decl->array_size = type_size(var->type->fields.array);
             } else {
                decl->array_size = type_size(var->type);
             }
             num_input_arrays++;
          } else {
             decl->array_id = 0;
             decl->array_size = 0;
          }
 
          entry = new(mem_ctx) variable_storage(var,
                                                PROGRAM_INPUT,
                                                decl->mesa_index,
                                                decl->array_id);
+         entry->component = component;
+
          this->variables.push_tail(entry);
          break;
       }
       case ir_var_shader_out: {
          assert(var->data.location != -1);
 
          const glsl_type *type_without_array = var->type->without_array();
          struct inout_decl *decl = &outputs[num_outputs];
+         unsigned component = var->data.location_frac;
          num_outputs++;
 
+         if (type_without_array->is_64bit())
+            component = component / 2;
+
          decl->mesa_index = var->data.location + FRAG_RESULT_MAX * 
var->data.index;
          decl->base_type = type_without_array->base_type;
-         decl->usage_mask = u_bit_consecutive(0,
+         decl->usage_mask = u_bit_consecutive(component,
                                               
type_without_array->vector_elements);
 
          if (is_inout_array(shader->Stage, var, &is_2d)) {
             decl->array_id = num_output_arrays + 1;
             if (is_2d) {
                decl->array_size = type_size(var->type->fields.array);
             } else {
                decl->array_size = type_size(var->type);
             }
             num_output_arrays++;
          } else {
             decl->array_id = 0;
             decl->array_size = 0;
          }
 
          entry = new(mem_ctx) variable_storage(var,
                                                PROGRAM_OUTPUT,
                                                decl->mesa_index,
                                                decl->array_id);
+         entry->component = component;
+
          this->variables.push_tail(entry);
          break;
       }
       case ir_var_system_value:
          entry = new(mem_ctx) variable_storage(var,
                                                PROGRAM_SYSTEM_VALUE,
                                                var->data.location);
          break;
       case ir_var_auto:
       case ir_var_temporary:
@@ -2460,21 +2493,21 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
 
          break;
       }
 
       if (!entry) {
          printf("Failed to make storage for %s\n", var->name);
          exit(1);
       }
    }
 
-   this->result = st_src_reg(entry->file, entry->index, var->type);
+   this->result = st_src_reg(entry->file, entry->index, var->type, 
entry->component);
    this->result.array_id = entry->array_id;
    if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == 
ir_var_shader_in && var->type->is_double())
       this->result.is_double_vertex_input = true;
    if (!native_integers)
       this->result.type = GLSL_TYPE_FLOAT;
 }
 
 static void
 shrink_array_declarations(struct inout_decl *decls, unsigned count,
                           GLbitfield64* usage_mask,
@@ -2624,26 +2657,20 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
          src.index2D = 0;
          src.has_index2 = true;
       } else {
          src.reladdr = ralloc(mem_ctx, st_src_reg);
          memcpy(src.reladdr, &index_reg, sizeof(index_reg));
       }
    }
 
-   /* If the type is smaller than a vec4, replicate the last channel out. */
-   if (ir->type->is_scalar() || ir->type->is_vector())
-      src.swizzle = swizzle_for_size(ir->type->vector_elements);
-   else
-      src.swizzle = SWIZZLE_NOOP;
-
    /* Change the register type to the element type of the array. */
    src.type = ir->type->base_type;
 
    this->result = src;
 }
 
 void
 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
 {
    unsigned int i;
@@ -2667,36 +2694,38 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
    this->result.index += offset;
    this->result.type = ir->type->base_type;
 }
 
 /**
  * We want to be careful in assignment setup to hit the actual storage
  * instead of potentially using a temporary like we might with the
  * ir_dereference handler.
  */
 static st_dst_reg
-get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component)
 {
    /* The LHS must be a dereference.  If the LHS is a variable indexed array
     * access of a vector, it must be separated into a series conditional moves
     * before reaching this point (see ir_vec_index_to_cond_assign).
     */
    assert(ir->as_dereference());
    ir_dereference_array *deref_array = ir->as_dereference_array();
    if (deref_array) {
       assert(!deref_array->array->type->is_vector());
    }
 
-   /* Use the rvalue deref handler for the most part.  We'll ignore
-    * swizzles in it and write swizzles using writemask, though.
+   /* Use the rvalue deref handler for the most part.  We write swizzles using
+    * the writemask, but we do extract the base component for enhanced layouts
+    * from the source swizzle.
     */
    ir->accept(v);
+   *component = GET_SWZ(v->result.swizzle, 0);
    return st_dst_reg(v->result);
 }
 
 /**
  * Process the condition of a conditional assignment
  *
  * Examines the condition of a conditional assignment to generate the optimal
  * first operand of a \c CMP instruction.  If the condition is a relational
  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
  * used as the source for the \c CMP instruction.  Otherwise the comparison
@@ -2867,27 +2896,28 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, 
const struct glsl_type *
    if (type->is_dual_slot()) {
       l->index++;
       if (r->is_double_vertex_input == false)
         r->index++;
    }
 }
 
 void
 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 {
+   int dst_component;
    st_dst_reg l;
    st_src_reg r;
 
    ir->rhs->accept(this);
    r = this->result;
 
-   l = get_assignment_lhs(ir->lhs, this);
+   l = get_assignment_lhs(ir->lhs, this, &dst_component);
 
    {
       int swizzles[4];
       int first_enabled_chan = 0;
       int rhs_chan = 0;
       ir_variable *variable = ir->lhs->variable_referenced();
 
       if (shader->Stage == MESA_SHADER_FRAGMENT &&
           variable->data.mode == ir_var_shader_out &&
           (variable->data.location == FRAG_RESULT_DEPTH ||
@@ -2914,20 +2944,22 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
          l.writemask = ir->write_mask;
       }
 
       for (int i = 0; i < 4; i++) {
          if (l.writemask & (1 << i)) {
             first_enabled_chan = GET_SWZ(r.swizzle, i);
             break;
          }
       }
 
+      l.writemask = l.writemask << dst_component;
+
       /* Swizzle a small RHS vector into the channels being written.
        *
        * glsl ir treats write_mask as dictating how many channels are
        * present on the RHS while TGSI treats write_mask as just
        * showing which channels of the vec4 RHS get written.
        */
       for (int i = 0; i < 4; i++) {
          if (l.writemask & (1 << i))
             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
          else
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/13] st/glsl_to_tgsi: adjust swizzles and writemasks for explicit components

Reply via email to