From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 70 ++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 19 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 47725f2..db8ebdf 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -60,33 +60,48 @@ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) #define MAX_GLSL_TEXTURE_OFFSET 4 class st_src_reg; class st_dst_reg; static int swizzle_for_size(int size); +static int swizzle_for_type(const glsl_type *type, int component = 0) +{ + unsigned num_elements = 4; + + if (type) { + type = type->without_array(); + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + num_elements = type->vector_elements; + } + + int swizzle = swizzle_for_size(num_elements); + assert(num_elements + component <= 4); + + swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1); + return swizzle; +} + /** * This struct is a corresponding struct to TGSI ureg_src. */ class st_src_reg { public: - st_src_reg(gl_register_file file, int index, const glsl_type *type) + st_src_reg(gl_register_file file, int index, const glsl_type *type, + int component = 0) { this->file = file; this->index = index; - if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) - this->swizzle = swizzle_for_size(type->vector_elements); - else - this->swizzle = SWIZZLE_XYZW; + this->swizzle = swizzle_for_type(type, component); this->negate = 0; this->index2D = 0; this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; this->double_reg2 = false; this->array_id = 0; this->is_double_vertex_input = false; } @@ -272,27 +287,33 @@ public: unsigned buffer_access; /**< buffer access type */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ const struct tgsi_opcode_info *info; }; class variable_storage : public exec_node { public: variable_storage(ir_variable *var, gl_register_file file, int index, unsigned array_id = 0) - : file(file), index(index), var(var), array_id(array_id) + : file(file), index(index), component(0), var(var), array_id(array_id) { /* empty */ } gl_register_file file; int index; + + /* Explicit component location. This is given in terms of the GLSL-style + * swizzles where each double is a single component, i.e. for 64-bit types + * it can only be 0 or 1. + */ + int component; ir_variable *var; /* variable that maps to this, if any */ unsigned array_id; }; class immediate_storage : public exec_node { public: immediate_storage(gl_constant_value *values, int size32, int type) { memcpy(this->values, values, size32 * sizeof(gl_constant_value)); this->size32 = size32; @@ -2380,76 +2401,88 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) case ir_var_shader_in: { /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and * user-defined varyings. */ assert(var->data.location != -1); const glsl_type *type_without_array = var->type->without_array(); struct inout_decl *decl = &inputs[num_inputs]; + unsigned component = var->data.location_frac; num_inputs++; + if (type_without_array->is_64bit()) + component = component / 2; + decl->mesa_index = var->data.location; decl->base_type = type_without_array->base_type; - decl->usage_mask = u_bit_consecutive(0, + decl->usage_mask = u_bit_consecutive(component, type_without_array->vector_elements); if (is_inout_array(shader->Stage, var, &is_2d)) { decl->array_id = num_input_arrays + 1; if (is_2d) { decl->array_size = type_size(var->type->fields.array); } else { decl->array_size = type_size(var->type); } num_input_arrays++; } else { decl->array_id = 0; decl->array_size = 0; } entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, decl->mesa_index, decl->array_id); + entry->component = component; + this->variables.push_tail(entry); break; } case ir_var_shader_out: { assert(var->data.location != -1); const glsl_type *type_without_array = var->type->without_array(); struct inout_decl *decl = &outputs[num_outputs]; + unsigned component = var->data.location_frac; num_outputs++; + if (type_without_array->is_64bit()) + component = component / 2; + decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; decl->base_type = type_without_array->base_type; - decl->usage_mask = u_bit_consecutive(0, + decl->usage_mask = u_bit_consecutive(component, type_without_array->vector_elements); if (is_inout_array(shader->Stage, var, &is_2d)) { decl->array_id = num_output_arrays + 1; if (is_2d) { decl->array_size = type_size(var->type->fields.array); } else { decl->array_size = type_size(var->type); } num_output_arrays++; } else { decl->array_id = 0; decl->array_size = 0; } entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, decl->mesa_index, decl->array_id); + entry->component = component; + this->variables.push_tail(entry); break; } case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, PROGRAM_SYSTEM_VALUE, var->data.location); break; case ir_var_auto: case ir_var_temporary: @@ -2460,21 +2493,21 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) break; } if (!entry) { printf("Failed to make storage for %s\n", var->name); exit(1); } } - this->result = st_src_reg(entry->file, entry->index, var->type); + this->result = st_src_reg(entry->file, entry->index, var->type, entry->component); this->result.array_id = entry->array_id; if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double()) this->result.is_double_vertex_input = true; if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } static void shrink_array_declarations(struct inout_decl *decls, unsigned count, GLbitfield64* usage_mask, @@ -2624,26 +2657,20 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) src.reladdr2 = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr2, &index_reg, sizeof(index_reg)); src.index2D = 0; src.has_index2 = true; } else { src.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } } - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - src.swizzle = swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = SWIZZLE_NOOP; - /* Change the register type to the element type of the array. */ src.type = ir->type->base_type; this->result = src; } void glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) { unsigned int i; @@ -2667,36 +2694,38 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) this->result.index += offset; this->result.type = ir->type->base_type; } /** * We want to be careful in assignment setup to hit the actual storage * instead of potentially using a temporary like we might with the * ir_dereference handler. */ static st_dst_reg -get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) { /* The LHS must be a dereference. If the LHS is a variable indexed array * access of a vector, it must be separated into a series conditional moves * before reaching this point (see ir_vec_index_to_cond_assign). */ assert(ir->as_dereference()); ir_dereference_array *deref_array = ir->as_dereference_array(); if (deref_array) { assert(!deref_array->array->type->is_vector()); } - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. + /* Use the rvalue deref handler for the most part. We write swizzles using + * the writemask, but we do extract the base component for enhanced layouts + * from the source swizzle. */ ir->accept(v); + *component = GET_SWZ(v->result.swizzle, 0); return st_dst_reg(v->result); } /** * Process the condition of a conditional assignment * * Examines the condition of a conditional assignment to generate the optimal * first operand of a \c CMP instruction. If the condition is a relational * operator with 0 (e.g., \c ir_binop_less), the value being compared will be * used as the source for the \c CMP instruction. Otherwise the comparison @@ -2867,27 +2896,28 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * if (type->is_dual_slot()) { l->index++; if (r->is_double_vertex_input == false) r->index++; } } void glsl_to_tgsi_visitor::visit(ir_assignment *ir) { + int dst_component; st_dst_reg l; st_src_reg r; ir->rhs->accept(this); r = this->result; - l = get_assignment_lhs(ir->lhs, this); + l = get_assignment_lhs(ir->lhs, this, &dst_component); { int swizzles[4]; int first_enabled_chan = 0; int rhs_chan = 0; ir_variable *variable = ir->lhs->variable_referenced(); if (shader->Stage == MESA_SHADER_FRAGMENT && variable->data.mode == ir_var_shader_out && (variable->data.location == FRAG_RESULT_DEPTH || @@ -2914,20 +2944,22 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) l.writemask = ir->write_mask; } for (int i = 0; i < 4; i++) { if (l.writemask & (1 << i)) { first_enabled_chan = GET_SWZ(r.swizzle, i); break; } } + l.writemask = l.writemask << dst_component; + /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are * present on the RHS while TGSI treats write_mask as just * showing which channels of the vec4 RHS get written. */ for (int i = 0; i < 4; i++) { if (l.writemask & (1 << i)) swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); else -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev