From: Nicolai Hähnle <nicolai.haeh...@amd.com> In order to be able to emit overlapping input and output array declarations, we flip the logic of emitting those declarations on its head: rather than iterating over slots and emitting the corresponding declarations, we iterate over the declarations from GLSL and emit those. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 277 +++++++++++++++-------------- 1 file changed, 144 insertions(+), 133 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aac80ee..47725f2 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -333,39 +333,52 @@ public: */ int inst; /** Storage for the return value. */ st_src_reg return_reg; }; static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -struct array_decl { +struct inout_decl { unsigned mesa_index; - unsigned array_id; + unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ unsigned array_size; - enum glsl_base_type array_type; + enum glsl_base_type base_type; + ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ }; -static enum glsl_base_type -find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id) +static struct inout_decl * +find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) { - unsigned i; + assert(array_id != 0); - for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; + for (unsigned i = 0; i < count; i++) { + struct inout_decl *decl = &decls[i]; if (array_id == decl->array_id) { - return decl->array_type; + return decl; } } + + return NULL; +} + +static enum glsl_base_type +find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) +{ + if (!array_id) + return GLSL_TYPE_ERROR; + struct inout_decl *decl = find_inout_array(decls, count, array_id); + if (decl) + return decl->base_type; return GLSL_TYPE_ERROR; } struct rename_reg_pair { int old_reg; int new_reg; }; struct glsl_to_tgsi_visitor : public ir_visitor { public: @@ -379,23 +392,25 @@ public: struct gl_shader_program *shader_program; struct gl_linked_shader *shader; struct gl_shader_compiler_options *options; int next_temp; unsigned *array_sizes; unsigned max_num_arrays; unsigned next_array; - struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS]; + struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; + unsigned num_inputs; unsigned num_input_arrays; - struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS]; + struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; + unsigned num_outputs; unsigned num_output_arrays; int num_address_regs; uint32_t samplers_used; glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ int buffers_used; int images_used; int image_targets[PIPE_MAX_SHADER_IMAGES]; unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; @@ -729,21 +744,21 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * changes to we pick the XY, ZW pairs from the correct index. * * GLSL [0].x -> TGSI [0].xy * GLSL [0].y -> TGSI [0].zw * GLSL [0].z -> TGSI [1].xy * GLSL [0].w -> TGSI [1].zw */ for (j = 0; j < 2; j++) { dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { - enum glsl_base_type type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id); + enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id); if (glsl_base_type_is_64bit(type)) dst_is_64bit[j] = true; } } if (dst_is_64bit[0] || dst_is_64bit[1] || glsl_base_type_is_64bit(inst->src[0].type)) { glsl_to_tgsi_instruction *dinst = NULL; int initial_src_swz[4], initial_src_idx[4]; int initial_dst_idx[2], initial_dst_writemask[2]; @@ -2355,85 +2370,89 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) ir_variable *var = ir->var; bool is_2d; if (!entry) { switch (var->data.mode) { case ir_var_uniform: entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, var->data.param_index); this->variables.push_tail(entry); break; - case ir_var_shader_in: + case ir_var_shader_in: { /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and * user-defined varyings. */ assert(var->data.location != -1); - if (is_inout_array(shader->Stage, var, &is_2d)) { - struct array_decl *decl = &input_arrays[num_input_arrays]; + const glsl_type *type_without_array = var->type->without_array(); + struct inout_decl *decl = &inputs[num_inputs]; + num_inputs++; + + decl->mesa_index = var->data.location; + decl->base_type = type_without_array->base_type; + decl->usage_mask = u_bit_consecutive(0, + type_without_array->vector_elements); - decl->mesa_index = var->data.location; + if (is_inout_array(shader->Stage, var, &is_2d)) { decl->array_id = num_input_arrays + 1; if (is_2d) { decl->array_size = type_size(var->type->fields.array); - decl->array_type = var->type->fields.array->without_array()->base_type; } else { decl->array_size = type_size(var->type); - decl->array_type = var->type->without_array()->base_type; } num_input_arrays++; - - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location, - decl->array_id); - } - else { - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); + } else { + decl->array_id = 0; + decl->array_size = 0; } + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + decl->mesa_index, + decl->array_id); this->variables.push_tail(entry); break; - case ir_var_shader_out: + } + case ir_var_shader_out: { assert(var->data.location != -1); - if (is_inout_array(shader->Stage, var, &is_2d)) { - struct array_decl *decl = &output_arrays[num_output_arrays]; + const glsl_type *type_without_array = var->type->without_array(); + struct inout_decl *decl = &outputs[num_outputs]; + num_outputs++; + + decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; + decl->base_type = type_without_array->base_type; + decl->usage_mask = u_bit_consecutive(0, + type_without_array->vector_elements); - decl->mesa_index = var->data.location; + if (is_inout_array(shader->Stage, var, &is_2d)) { decl->array_id = num_output_arrays + 1; if (is_2d) { decl->array_size = type_size(var->type->fields.array); - decl->array_type = var->type->fields.array->without_array()->base_type; } else { decl->array_size = type_size(var->type); - decl->array_type = var->type->without_array()->base_type; } num_output_arrays++; - - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location, - decl->array_id); - } - else { - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location - + FRAG_RESULT_MAX * - var->data.index); + } else { + decl->array_id = 0; + decl->array_size = 0; } + + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + decl->mesa_index, + decl->array_id); this->variables.push_tail(entry); break; + } case ir_var_system_value: entry = new(mem_ctx) variable_storage(var, PROGRAM_SYSTEM_VALUE, var->data.location); break; case ir_var_auto: case ir_var_temporary: st_src_reg src = get_temp(var->type); entry = new(mem_ctx) variable_storage(var, src.file, src.index); @@ -2450,33 +2469,35 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) this->result = st_src_reg(entry->file, entry->index, var->type); this->result.array_id = entry->array_id; if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double()) this->result.is_double_vertex_input = true; if (!native_integers) this->result.type = GLSL_TYPE_FLOAT; } static void -shrink_array_declarations(struct array_decl *arrays, unsigned count, +shrink_array_declarations(struct inout_decl *decls, unsigned count, GLbitfield64* usage_mask, GLbitfield64 double_usage_mask, GLbitfield* patch_usage_mask) { unsigned i; int j; /* Fix array declarations by removing unused array elements at both ends * of the arrays. For example, mat4[3] where only mat[1] is used. */ for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; + struct inout_decl *decl = &decls[i]; + if (!decl->array_id) + continue; /* Shrink the beginning. */ for (j = 0; j < (int)decl->array_size; j++) { if (decl->mesa_index >= VARYING_SLOT_PATCH0) { if (*patch_usage_mask & BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) break; } else { if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) @@ -4334,20 +4355,22 @@ glsl_to_tgsi_visitor::visit(ir_barrier *ir) glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); result.file = PROGRAM_UNDEFINED; next_temp = 1; array_sizes = NULL; max_num_arrays = 0; next_array = 0; + num_inputs = 0; + num_outputs = 0; num_input_arrays = 0; num_output_arrays = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; num_address_regs = 0; samplers_used = 0; buffers_used = 0; images_used = 0; indirect_addr_consts = false; @@ -5206,22 +5229,24 @@ struct st_translate { struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; struct ureg_src shared_memory; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; - struct array_decl *input_arrays; - struct array_decl *output_arrays; + struct inout_decl *input_decls; + unsigned num_input_decls; + struct inout_decl *output_decls; + unsigned num_output_decls; const GLuint *inputMapping; const GLuint *outputMapping; /* For every instruction that contains a label (eg CALL), keep * details so that we can go back afterwards and emit the correct * tgsi instruction number for each label. */ struct label *labels; unsigned labels_size; @@ -5434,27 +5459,29 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, t->procType == PIPE_SHADER_TESS_EVAL) assert(index < VARYING_SLOT_TESS_MAX); else assert(index < VARYING_SLOT_MAX); assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); return t->outputs[t->outputMapping[index]]; } else { - struct array_decl *decl = &t->output_arrays[array_id-1]; + struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id); unsigned mesa_index = decl->mesa_index; int slot = t->outputMapping[mesa_index]; assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); - assert(t->outputs[slot].ArrayID == array_id); - return ureg_dst_array_offset(t->outputs[slot], index - mesa_index); + + struct ureg_dst dst = t->outputs[slot]; + dst.ArrayID = array_id; + return ureg_dst_array_offset(dst, index - mesa_index); } case PROGRAM_ADDRESS: return t->address[index]; default: assert(!"unknown dst register file"); return ureg_dst_undef(); } } @@ -5497,27 +5524,29 @@ src_register(struct st_translate *t, const st_src_reg *reg) /* GLSL inputs are 64-bit containers, so we have to * map back to the original index and add the offset after * mapping. */ index -= double_reg2; if (!reg->array_id) { assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); return t->inputs[t->inputMapping[index] + double_reg2]; } else { - struct array_decl *decl = &t->input_arrays[reg->array_id-1]; + struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id); unsigned mesa_index = decl->mesa_index; int slot = t->inputMapping[mesa_index]; assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); - assert(t->inputs[slot].ArrayID == reg->array_id); - return ureg_src_array_offset(t->inputs[slot], index + double_reg2 - mesa_index); + + struct ureg_src src = t->inputs[slot]; + src.ArrayID = reg->array_id; + return ureg_src_array_offset(src, index + double_reg2 - mesa_index); } case PROGRAM_ADDRESS: return ureg_src(t->address[reg->index]); case PROGRAM_SYSTEM_VALUE: assert(reg->index < (int) ARRAY_SIZE(t->systemValues)); return t->systemValues[reg->index]; default: @@ -5998,39 +6027,20 @@ emit_face_var(struct gl_context *ctx, struct st_translate *t) } else { /* MOV_SAT face_temp, input[face] */ ureg_MOV(ureg, ureg_saturate(face_temp), face_input); } /* Use face_temp as face input from here on: */ t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); } -static bool -find_array(unsigned attr, struct array_decl *arrays, unsigned count, - unsigned *array_id, unsigned *array_size) -{ - unsigned i; - - for (i = 0; i < count; i++) { - struct array_decl *decl = &arrays[i]; - - if (attr == decl->mesa_index) { - *array_id = decl->array_id; - *array_size = decl->array_size; - assert(*array_size); - return true; - } - } - return false; -} - static void emit_compute_block_size(const struct gl_program *program, struct ureg_program *ureg) { const struct gl_compute_program *cp = (const struct gl_compute_program *)program; ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, cp->LocalSize[0]); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, cp->LocalSize[1]); @@ -6100,62 +6110,51 @@ st_translate_program( t->num_temp_arrays = program->next_array; if (t->num_temp_arrays) t->arrays = (struct ureg_dst*) calloc(t->num_temp_arrays, sizeof(t->arrays[0])); /* * Declare input attributes. */ switch (procType) { case PIPE_SHADER_FRAGMENT: - for (i = 0; i < numInputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(inputSlotToAttr[i], program->input_arrays, - program->num_input_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], inputSemanticIndex[i], - interpMode[i], 0, interpLocation[i], - array_id, array_size); - - GLuint base_attr = inputSlotToAttr[i]; - while (i + 1 < numInputs && - inputSlotToAttr[i + 1] < base_attr + array_size) - ++i; - } - else { - t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, - inputSemanticName[i], inputSemanticIndex[i], - interpMode[i], 0, interpLocation[i], 0, 1); - } - } - break; case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_TESS_EVAL: case PIPE_SHADER_TESS_CTRL: - for (i = 0; i < numInputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(inputSlotToAttr[i], program->input_arrays, - program->num_input_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i], - array_id, array_size); - i += array_size - 1; + for (i = 0; i < program->num_inputs; ++i) { + struct inout_decl *decl = &program->inputs[i]; + unsigned slot = inputMapping[decl->mesa_index]; + struct ureg_src src; + ubyte tgsi_usage_mask = decl->usage_mask; + unsigned size = decl->array_id ? decl->array_size : 1; + + if (glsl_base_type_is_64bit(decl->base_type)) { + if (tgsi_usage_mask == 1) + tgsi_usage_mask = TGSI_WRITEMASK_XY; + else if (tgsi_usage_mask == 2) + tgsi_usage_mask = TGSI_WRITEMASK_ZW; + else { + if (!decl->array_id && (tgsi_usage_mask & TGSI_WRITEMASK_ZW)) + size = 2; + tgsi_usage_mask = TGSI_WRITEMASK_XYZW; + } } - else { - t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], - inputSemanticIndex[i], 0, 1); + + src = ureg_DECL_fs_input_cyl_centroid_layout(ureg, + inputSemanticName[slot], inputSemanticIndex[slot], + interpMode ? interpMode[slot] : 0, 0, interpLocation ? interpLocation[slot] : 0, + slot, decl->array_id, size, tgsi_usage_mask); + + if (t->inputs[slot].File != TGSI_FILE_INPUT) { + /* The ArrayID is set up in dst_register */ + src.ArrayID = 0; + t->inputs[slot] = src; } } break; case PIPE_SHADER_VERTEX: for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_vs_input(ureg, i); } break; case PIPE_SHADER_COMPUTE: break; @@ -6167,37 +6166,47 @@ st_translate_program( * Declare output attributes. */ switch (procType) { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_COMPUTE: break; case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_TESS_EVAL: case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_VERTEX: - for (i = 0; i < numOutputs; i++) { - unsigned array_id = 0; - unsigned array_size; - - if (find_array(outputSlotToAttr[i], program->output_arrays, - program->num_output_arrays, &array_id, &array_size)) { - /* We've found an array. Declare it so. */ - t->outputs[i] = ureg_DECL_output_array(ureg, - outputSemanticName[i], - outputSemanticIndex[i], - array_id, array_size); - i += array_size - 1; + for (i = 0; i < program->num_outputs; ++i) { + struct inout_decl *decl = &program->outputs[i]; + unsigned slot = outputMapping[decl->mesa_index]; + struct ureg_dst dst; + ubyte tgsi_usage_mask = decl->usage_mask; + unsigned size = decl->array_id ? decl->array_size : 1; + + if (glsl_base_type_is_64bit(decl->base_type)) { + if (tgsi_usage_mask == 1) + tgsi_usage_mask = TGSI_WRITEMASK_XY; + else if (tgsi_usage_mask == 2) + tgsi_usage_mask = TGSI_WRITEMASK_ZW; + else { + if (!decl->array_id && (tgsi_usage_mask & TGSI_WRITEMASK_ZW)) + size = 2; + tgsi_usage_mask = TGSI_WRITEMASK_XYZW; + } } - else { - t->outputs[i] = ureg_DECL_output(ureg, - outputSemanticName[i], - outputSemanticIndex[i]); + + dst = ureg_DECL_output_layout(ureg, + outputSemanticName[slot], outputSemanticIndex[slot], + slot, decl->array_id, size, tgsi_usage_mask); + + if (t->outputs[slot].File != TGSI_FILE_OUTPUT) { + /* The ArrayID is set up in dst_register */ + dst.ArrayID = 0; + t->outputs[slot] = dst; } } break; default: assert(0); } if (procType == PIPE_SHADER_FRAGMENT) { if (program->shader->info.EarlyFragmentTests) ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); @@ -6306,22 +6315,24 @@ st_translate_program( semName == TGSI_SEMANTIC_POSITION) emit_wpos(st_context(ctx), t, proginfo, ureg, program->wpos_transform_const); sysInputs &= ~(1 << i); } } } t->array_sizes = program->array_sizes; - t->input_arrays = program->input_arrays; - t->output_arrays = program->output_arrays; + t->input_decls = program->inputs; + t->num_input_decls = program->num_inputs; + t->output_decls = program->outputs; + t->num_output_decls = program->num_outputs; /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { t->constants = (struct ureg_src *) calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -6639,23 +6650,23 @@ get_mesa_program_tgsi(struct gl_context *ctx, _mesa_shader_stage_to_string(shader->Stage), shader_program->Name); _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); _mesa_log("\n\n"); } prog->Instructions = NULL; prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog, shader->Stage); - shrink_array_declarations(v->input_arrays, v->num_input_arrays, + shrink_array_declarations(v->inputs, v->num_inputs, &prog->InputsRead, prog->DoubleInputsRead, &prog->PatchInputsRead); - shrink_array_declarations(v->output_arrays, v->num_output_arrays, + shrink_array_declarations(v->outputs, v->num_outputs, &prog->OutputsWritten, 0ULL, &prog->PatchOutputsWritten); count_resources(v, prog); /* The GLSL IR won't be needed anymore. */ ralloc_free(shader->ir); shader->ir = NULL; /* This must be done before the uniform storage is associated. */ if (shader->Stage == MESA_SHADER_FRAGMENT && (prog->InputsRead & VARYING_BIT_POS || -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev