nouveau supports PIPE_SHADER_CAP_SUBROUTINES and properly details with RET opcodes. The alternative is that the st lowers the whole thing into a loop which adds IMHO unnecessary complexity to the resulting code. Any reason not to leave that in place?
-ilia On Mon, Oct 17, 2016 at 9:39 AM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > Never used. The GLSL compiler doesn't even look at EmitNoFunctions. > --- > src/mesa/state_tracker/st_extensions.c | 6 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 212 > +---------------------------- > 2 files changed, 7 insertions(+), 211 deletions(-) > > diff --git a/src/mesa/state_tracker/st_extensions.c > b/src/mesa/state_tracker/st_extensions.c > index b87a3db..13b7ae4 100644 > --- a/src/mesa/state_tracker/st_extensions.c > +++ b/src/mesa/state_tracker/st_extensions.c > @@ -265,24 +265,22 @@ void st_init_limits(struct pipe_screen *screen, > > options->EmitNoNoise = TRUE; > > /* TODO: make these more fine-grained if anyone needs it */ > options->MaxIfDepth = > screen->get_shader_param(screen, sh, > PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); > options->EmitNoLoops = > !screen->get_shader_param(screen, sh, > PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); > - options->EmitNoFunctions = > - !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES); > - options->EmitNoMainReturn = > - !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES); > + options->EmitNoFunctions = true; > + options->EmitNoMainReturn = true; > > options->EmitNoCont = > !screen->get_shader_param(screen, sh, > PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED); > > options->EmitNoIndirectInput = > !screen->get_shader_param(screen, sh, > PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR); > options->EmitNoIndirectOutput = > !screen->get_shader_param(screen, sh, > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 2ae15c9..293654c 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -284,21 +284,20 @@ public: > unsigned sampler_base:5; > unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if > not array */ > unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */ > glsl_base_type tex_type:4; > unsigned tex_shadow:1; > unsigned image_format:9; > unsigned tex_offset_num_offset:3; > unsigned dead_mask:4; /**< Used in dead code elimination */ > unsigned buffer_access:3; /**< buffer access type */ > > - class function_entry *function; /* Set on TGSI_OPCODE_CAL or > TGSI_OPCODE_BGNSUB */ > const struct tgsi_opcode_info *info; > }; > > class variable_storage : public exec_node { > public: > variable_storage(ir_variable *var, gl_register_file file, int index, > unsigned array_id = 0) > : file(file), index(index), component(0), var(var), array_id(array_id) > { > assert(file != PROGRAM_ARRAY || array_id != 0); > @@ -324,52 +323,20 @@ public: > this->size32 = size32; > this->type = type; > } > > /* doubles are stored across 2 gl_constant_values */ > gl_constant_value values[4]; > int size32; /**< Number of 32-bit components (1-4) */ > int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ > }; > > -class function_entry : public exec_node { > -public: > - ir_function_signature *sig; > - > - /** > - * identifier of this function signature used by the program. > - * > - * At the point that TGSI instructions for function calls are > - * generated, we don't know the address of the first instruction of > - * the function body. So we make the BranchTarget that is called a > - * small integer and rewrite them during set_branchtargets(). > - */ > - int sig_id; > - > - /** > - * Pointer to first instruction of the function body. > - * > - * Set during function body emits after main() is processed. > - */ > - glsl_to_tgsi_instruction *bgn_inst; > - > - /** > - * Index of the first instruction of the function body in actual TGSI. > - * > - * Set after conversion from glsl_to_tgsi_instruction to TGSI. > - */ > - int inst; > - > - /** Storage for the return value. */ > - st_src_reg return_reg; > -}; > - > static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, > GLSL_TYPE_ERROR); > static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, > GLSL_TYPE_ERROR); > > struct inout_decl { > unsigned mesa_index; > unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ > unsigned size; > enum glsl_base_type base_type; > ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ > }; > @@ -404,22 +371,20 @@ find_array_type(struct inout_decl *decls, unsigned > count, unsigned array_id) > struct rename_reg_pair { > int old_reg; > int new_reg; > }; > > struct glsl_to_tgsi_visitor : public ir_visitor { > public: > glsl_to_tgsi_visitor(); > ~glsl_to_tgsi_visitor(); > > - function_entry *current_function; > - > struct gl_context *ctx; > struct gl_program *prog; > struct gl_shader_program *shader_program; > struct gl_linked_shader *shader; > struct gl_shader_compiler_options *options; > > int next_temp; > > unsigned *array_sizes; > unsigned max_num_arrays; > @@ -447,22 +412,20 @@ public: > bool native_integers; > bool have_sqrt; > bool have_fma; > bool use_shared_memory; > > variable_storage *find_variable_storage(ir_variable *var); > > int add_constant(gl_register_file file, gl_constant_value values[8], > int size, int datatype, uint16_t *swizzle_out); > > - function_entry *get_function_signature(ir_function_signature *sig); > - > st_src_reg get_temp(const glsl_type *type); > void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int > *num_reladdr); > > st_src_reg st_src_reg_for_double(double val); > st_src_reg st_src_reg_for_float(float val); > st_src_reg st_src_reg_for_int(int val); > st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); > > /** > * \name Visit methods > @@ -504,24 +467,20 @@ public: > > st_src_reg result; > > /** List of variable_storage */ > exec_list variables; > > /** List of immediate_storage */ > exec_list immediates; > unsigned num_immediates; > > - /** List of function_entry */ > - exec_list function_signatures; > - int next_signature_id; > - > /** List of glsl_to_tgsi_instruction */ > exec_list instructions; > > glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, > st_dst_reg dst = undef_dst, > st_src_reg src0 = undef_src, > st_src_reg src1 = undef_src, > st_src_reg src2 = undef_src, > st_src_reg src3 = undef_src); > > @@ -712,22 +671,20 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, > unsigned op, > inst->src[2] = src2; > inst->src[3] = src3; > inst->is_64bit_expanded = false; > inst->ir = ir; > inst->dead_mask = 0; > /* default to float, for paths where this is not initialized > * (since 0==UINT which is likely wrong): > */ > inst->tex_type = GLSL_TYPE_FLOAT; > > - inst->function = NULL; > - > /* Update indirect addressing status used by TGSI */ > if (dst.reladdr || dst.reladdr2) { > switch(dst.file) { > case PROGRAM_STATE_VAR: > case PROGRAM_CONSTANT: > case PROGRAM_UNIFORM: > this->indirect_addr_consts = true; > break; > case PROGRAM_IMMEDIATE: > assert(!"immediates should not have indirect addressing"); > @@ -3181,56 +3138,20 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) > } > > this->result = st_src_reg(file, -1, ir->type); > this->result.index = add_constant(file, > values, > ir->type->vector_elements, > gl_type, > &this->result.swizzle); > } > > -function_entry * > -glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) > -{ > - foreach_in_list_use_after(function_entry, entry, > &this->function_signatures) { > - if (entry->sig == sig) > - return entry; > - } > - > - entry = ralloc(mem_ctx, function_entry); > - entry->sig = sig; > - entry->sig_id = this->next_signature_id++; > - entry->bgn_inst = NULL; > - > - /* Allocate storage for all the parameters. */ > - foreach_in_list(ir_variable, param, &sig->parameters) { > - variable_storage *storage; > - > - storage = find_variable_storage(param); > - assert(!storage); > - > - st_src_reg src = get_temp(param->type); > - > - storage = new(mem_ctx) variable_storage(param, src.file, src.index); > - this->variables.push_tail(storage); > - } > - > - if (!sig->return_type->is_void()) { > - entry->return_reg = get_temp(sig->return_type); > - } else { > - entry->return_reg = undef_src; > - } > - > - this->function_signatures.push_tail(entry); > - return entry; > -} > - > void > glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) > { > exec_node *param = ir->actual_parameters.get_head(); > ir_dereference *deref = static_cast<ir_dereference *>(param); > ir_variable *location = deref->variable_referenced(); > > st_src_reg buffer( > PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); > > @@ -3727,30 +3648,24 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call > *ir) > inst->buffer_access |= TGSI_MEMORY_COHERENT; > if (imgvar->data.image_restrict) > inst->buffer_access |= TGSI_MEMORY_RESTRICT; > if (imgvar->data.image_volatile) > inst->buffer_access |= TGSI_MEMORY_VOLATILE; > } > > void > glsl_to_tgsi_visitor::visit(ir_call *ir) > { > - glsl_to_tgsi_instruction *call_inst; > ir_function_signature *sig = ir->callee; > - function_entry *entry; > - int i; > > /* Filter out intrinsics */ > switch (sig->intrinsic_id) { > - case ir_intrinsic_invalid: > - break; > - > case ir_intrinsic_atomic_counter_read: > case ir_intrinsic_atomic_counter_increment: > case ir_intrinsic_atomic_counter_predecrement: > case ir_intrinsic_atomic_counter_add: > case ir_intrinsic_atomic_counter_min: > case ir_intrinsic_atomic_counter_max: > case ir_intrinsic_atomic_counter_and: > case ir_intrinsic_atomic_counter_or: > case ir_intrinsic_atomic_counter_xor: > case ir_intrinsic_atomic_counter_exchange: > @@ -3801,98 +3716,34 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) > case ir_intrinsic_image_atomic_and: > case ir_intrinsic_image_atomic_or: > case ir_intrinsic_image_atomic_xor: > case ir_intrinsic_image_atomic_exchange: > case ir_intrinsic_image_atomic_comp_swap: > case ir_intrinsic_image_size: > case ir_intrinsic_image_samples: > visit_image_intrinsic(ir); > return; > > + case ir_intrinsic_invalid: > case ir_intrinsic_generic_load: > case ir_intrinsic_generic_store: > case ir_intrinsic_generic_atomic_add: > case ir_intrinsic_generic_atomic_and: > case ir_intrinsic_generic_atomic_or: > case ir_intrinsic_generic_atomic_xor: > case ir_intrinsic_generic_atomic_min: > case ir_intrinsic_generic_atomic_max: > case ir_intrinsic_generic_atomic_exchange: > case ir_intrinsic_generic_atomic_comp_swap: > case ir_intrinsic_shader_clock: > unreachable("Invalid intrinsic"); > } > - > - entry = get_function_signature(sig); > - /* Process in parameters. */ > - foreach_two_lists(formal_node, &sig->parameters, > - actual_node, &ir->actual_parameters) { > - ir_rvalue *param_rval = (ir_rvalue *) actual_node; > - ir_variable *param = (ir_variable *) formal_node; > - > - if (param->data.mode == ir_var_function_in || > - param->data.mode == ir_var_function_inout) { > - variable_storage *storage = find_variable_storage(param); > - assert(storage); > - > - param_rval->accept(this); > - st_src_reg r = this->result; > - > - st_dst_reg l; > - l.file = storage->file; > - l.index = storage->index; > - l.reladdr = NULL; > - l.writemask = WRITEMASK_XYZW; > - > - for (i = 0; i < type_size(param->type); i++) { > - emit_asm(ir, TGSI_OPCODE_MOV, l, r); > - l.index++; > - r.index++; > - } > - } > - } > - > - /* Emit call instruction */ > - call_inst = emit_asm(ir, TGSI_OPCODE_CAL); > - call_inst->function = entry; > - > - /* Process out parameters. */ > - foreach_two_lists(formal_node, &sig->parameters, > - actual_node, &ir->actual_parameters) { > - ir_rvalue *param_rval = (ir_rvalue *) actual_node; > - ir_variable *param = (ir_variable *) formal_node; > - > - if (param->data.mode == ir_var_function_out || > - param->data.mode == ir_var_function_inout) { > - variable_storage *storage = find_variable_storage(param); > - assert(storage); > - > - st_src_reg r; > - r.file = storage->file; > - r.index = storage->index; > - r.reladdr = NULL; > - r.swizzle = SWIZZLE_NOOP; > - r.negate = 0; > - > - param_rval->accept(this); > - st_dst_reg l = st_dst_reg(this->result); > - > - for (i = 0; i < type_size(param->type); i++) { > - emit_asm(ir, TGSI_OPCODE_MOV, l, r); > - l.index++; > - r.index++; > - } > - } > - } > - > - /* Process return value. */ > - this->result = entry->return_reg; > } > > void > glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, > unsigned *array_elements, > uint16_t *index, > st_src_reg *indirect, > unsigned *location) > { > switch (tail->ir_type) { > @@ -4318,39 +4169,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) > } > > inst->tex_type = ir->type->base_type; > > this->result = result_src; > } > > void > glsl_to_tgsi_visitor::visit(ir_return *ir) > { > - if (ir->get_value()) { > - st_dst_reg l; > - int i; > - > - assert(current_function); > - > - ir->get_value()->accept(this); > - st_src_reg r = this->result; > - > - l = st_dst_reg(current_function->return_reg); > - > - for (i = 0; i < type_size(current_function->sig->return_type); i++) { > - emit_asm(ir, TGSI_OPCODE_MOV, l, r); > - l.index++; > - r.index++; > - } > - } > - > - emit_asm(ir, TGSI_OPCODE_RET); > + assert(0); > } > > void > glsl_to_tgsi_visitor::visit(ir_discard *ir) > { > if (ir->condition) { > ir->condition->accept(this); > st_src_reg condition = this->result; > > /* Convert the bool condition to a float so we can negate. */ > @@ -4428,23 +4261,21 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() > > result.file = PROGRAM_UNDEFINED; > next_temp = 1; > array_sizes = NULL; > max_num_arrays = 0; > next_array = 0; > num_inputs = 0; > num_outputs = 0; > num_input_arrays = 0; > num_output_arrays = 0; > - next_signature_id = 1; > num_immediates = 0; > - current_function = NULL; > num_address_regs = 0; > samplers_used = 0; > buffers_used = 0; > images_used = 0; > indirect_addr_consts = false; > wpos_transform_const = -1; > glsl_version = 0; > native_integers = false; > mem_ctx = ralloc_context(NULL); > ctx = NULL; > @@ -4577,25 +4408,22 @@ glsl_to_tgsi_visitor::simplify_cmp(void) > > memset(outputWrites, 0, sizeof(outputWrites)); > > foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { > unsigned prevWriteMask = 0; > > /* Give up if we encounter relative addressing or flow control. */ > if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || > inst->dst[1].reladdr || inst->dst[1].reladdr2 || > tgsi_get_opcode_info(inst->op)->is_branch || > - inst->op == TGSI_OPCODE_BGNSUB || > inst->op == TGSI_OPCODE_CONT || > - inst->op == TGSI_OPCODE_END || > - inst->op == TGSI_OPCODE_ENDSUB || > - inst->op == TGSI_OPCODE_RET) { > + inst->op == TGSI_OPCODE_END) { > break; > } > > if (inst->dst[0].file == PROGRAM_OUTPUT) { > assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); > prevWriteMask = outputWrites[inst->dst[0].index]; > outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; > } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { > if (inst->dst[0].index >= tempWritesSize) { > const int inc = 4096; > @@ -5305,21 +5133,21 @@ struct st_translate { > struct ureg_src shared_memory; > unsigned *array_sizes; > struct inout_decl *input_decls; > unsigned num_input_decls; > struct inout_decl *output_decls; > unsigned num_output_decls; > > const GLuint *inputMapping; > const GLuint *outputMapping; > > - /* For every instruction that contains a label (eg CALL), keep > + /* For every instruction that contains a label, keep > * details so that we can go back afterwards and emit the correct > * tgsi instruction number for each label. > */ > struct label *labels; > unsigned labels_size; > unsigned labels_count; > > /* Keep a record of the tgsi instruction number that each mesa > * instruction starts at, will be used to fix up labels after > * translation. > @@ -5737,31 +5565,29 @@ compile_tgsi_instruction(struct st_translate *t, > for (i = 0; i < num_dst; i++) > dst[i] = translate_dst(t, > &inst->dst[i], > inst->saturate); > > for (i = 0; i < num_src; i++) > src[i] = translate_src(t, &inst->src[i]); > > switch(inst->op) { > case TGSI_OPCODE_BGNLOOP: > - case TGSI_OPCODE_CAL: > case TGSI_OPCODE_ELSE: > case TGSI_OPCODE_ENDLOOP: > case TGSI_OPCODE_IF: > case TGSI_OPCODE_UIF: > assert(num_dst == 0); > ureg_label_insn(ureg, > inst->op, > src, num_src, > - get_label(t, > - inst->op == TGSI_OPCODE_CAL ? > inst->function->sig_id : 0)); > + get_label(t, 0)); > return; > > case TGSI_OPCODE_TEX: > case TGSI_OPCODE_TXB: > case TGSI_OPCODE_TXD: > case TGSI_OPCODE_TXL: > case TGSI_OPCODE_TXP: > case TGSI_OPCODE_TXQ: > case TGSI_OPCODE_TXQS: > case TGSI_OPCODE_TXF: > @@ -6557,21 +6383,20 @@ out: > * generating Mesa IR. > */ > static struct gl_program * > get_mesa_program_tgsi(struct gl_context *ctx, > struct gl_shader_program *shader_program, > struct gl_linked_shader *shader) > { > glsl_to_tgsi_visitor* v; > struct gl_program *prog; > GLenum target = _mesa_shader_stage_to_program(shader->Stage); > - bool progress; > struct gl_shader_compiler_options *options = > &ctx->Const.ShaderCompilerOptions[shader->Stage]; > struct pipe_screen *pscreen = ctx->st->pipe->screen; > enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); > > validate_ir_tree(shader->ir); > > prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); > if (!prog) > return NULL; > @@ -6593,47 +6418,20 @@ get_mesa_program_tgsi(struct gl_context *ctx, > _mesa_copy_linked_program_data(shader->Stage, shader_program, prog); > _mesa_generate_parameters_list_for_uniforms(shader_program, shader, > prog->Parameters); > > /* Remove reads from output registers. */ > lower_output_reads(shader->Stage, shader->ir); > > /* Emit intermediate IR for main(). */ > visit_exec_list(shader->ir, v); > > - /* Now emit bodies for any functions that were used. */ > - do { > - progress = GL_FALSE; > - > - foreach_in_list(function_entry, entry, &v->function_signatures) { > - if (!entry->bgn_inst) { > - v->current_function = entry; > - > - entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB); > - entry->bgn_inst->function = entry; > - > - visit_exec_list(&entry->sig->body, v); > - > - glsl_to_tgsi_instruction *last; > - last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); > - if (last->op != TGSI_OPCODE_RET) > - v->emit_asm(NULL, TGSI_OPCODE_RET); > - > - glsl_to_tgsi_instruction *end; > - end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB); > - end->function = entry; > - > - progress = GL_TRUE; > - } > - } > - } while (progress); > - > #if 0 > /* Print out some information (for debugging purposes) used by the > * optimization passes. */ > { > int i; > int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); > int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); > int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); > int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); > > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev