--- src/mesa/state_tracker/st_atom_constbuf.c | 8 + src/mesa/state_tracker/st_extensions.c | 10 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 383 ++++++++++++++++++++++++---- 3 files changed, 350 insertions(+), 51 deletions(-)
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 05667a7..fb874fe 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -45,6 +45,8 @@ #include "st_atom_constbuf.h" #include "st_program.h" +#include "st_cb_bufferobjects.h" + /** * Pass the given program parameters to the graphics pipe as a @@ -55,6 +57,7 @@ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, unsigned shader_type) { + unsigned i; struct pipe_context *pipe = st->pipe; assert(shader_type == PIPE_SHADER_VERTEX || @@ -100,6 +103,11 @@ void st_upload_constants( struct st_context *st, st->state.constants[shader_type].size = 0; st->pipe->set_constant_buffer(st->pipe, shader_type, 0, NULL); } + + for (i = 0; i < params->NumUBO; i++) { + struct st_buffer_object* buf = st_buffer_object(st->ctx->UniformBufferObject.BindingPoint[i]); + st->pipe->set_constant_buffer(st->pipe,shader_type,st->ctx->UniformBufferObject.UBOIndexes[i],buf->buffer); + } } diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index c741d13..cc017c1 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -249,7 +249,7 @@ void st_init_extensions(struct st_context *st) struct gl_context *ctx = st->ctx; int i; - ctx->Const.GLSLVersion = 120; + ctx->Const.GLSLVersion = 130; _mesa_override_glsl_version(st->ctx); /* @@ -670,4 +670,12 @@ void st_init_extensions(struct st_context *st) PIPE_BIND_SAMPLER_VIEW)) ctx->Extensions.ARB_texture_rgb10_a2ui = GL_TRUE; +#if FEATURE_ARB_uniform_buffer_object + if (screen->get_shader_param(screen,PIPE_SHADER_VERTEX,PIPE_SHADER_CAP_MAX_CONST_BUFFERS) > 1 +#if FEATURE_ARB_geometry_shader4 + && screen->get_shader_param(screen,PIPE_SHADER_GEOMETRY,PIPE_SHADER_CAP_MAX_CONST_BUFFERS) > 1 +#endif + && screen->get_shader_param(screen,PIPE_SHADER_FRAGMENT,PIPE_SHADER_CAP_MAX_CONST_BUFFERS) > 1) + ctx->Extensions.ARB_uniform_buffer_object = GL_TRUE; +#endif } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5e68b80..6c2cca8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -70,6 +70,8 @@ extern "C" { #include "st_mesa_to_tgsi.h" } +#include "main/hash.h" + #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ @@ -290,6 +292,167 @@ public: st_src_reg return_reg; }; + +struct stride_information { + unsigned stride; + ir_rvalue* expr; +}; + +class delayed_ubo_assign_detector : public ir_hierarchical_visitor { +public: + unsigned ubo_index; + bool has_ubo_in_it; + union gl_variable_storage *subtype; + unsigned offset; + struct stride_information strided_expressions[16]; + unsigned number_of_strided_expr; + + delayed_ubo_assign_detector(const hash_table *); + delayed_ubo_assign_detector(const delayed_ubo_assign_detector&); + static const hash_table* prepopulate(struct gl_shader_program *, struct gl_shader *); + static const unsigned* linearise_type_tree(void *, union gl_variable_storage *, const glsl_type *const,size_t&, unsigned cumuled_offset = 0); + + const struct hash_table *ubo_refs; + + void update_offset(const glsl_type *const); + + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_leave(ir_dereference_record *); + virtual ir_visitor_status visit_leave(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_expression *); +}; + +delayed_ubo_assign_detector::delayed_ubo_assign_detector(const hash_table* t):offset(0),number_of_strided_expr(0),subtype(0),ubo_refs(t) +{ + +} + +delayed_ubo_assign_detector::delayed_ubo_assign_detector(const delayed_ubo_assign_detector &orig):offset(0),number_of_strided_expr(0),subtype(0),ubo_refs(orig.ubo_refs) +{ + +} + + +const struct hash_table* +delayed_ubo_assign_detector::prepopulate(struct gl_shader_program *shader_program, struct gl_shader *shader) +{ + struct hash_table* retval = hash_table_ctor(0,hash_table_string_hash,hash_table_string_compare); + for (unsigned ubo_id = 0; ubo_id < shader_program->UBOCount; ++ubo_id) { + const gl_uniform_buffer_object& current_ubo = shader_program->UniformBufferObject[ubo_id]; + for (unsigned ubo_var = 0; ubo_var < current_ubo.NumberOfVariables; ++ubo_var) { + hash_table_insert(retval, + (void*) current_ubo.Variables[ubo_var].Storage, + current_ubo.Variables[ubo_var].Name); + } + } + + return retval; +} + +const unsigned* +delayed_ubo_assign_detector::linearise_type_tree(void *ctx, union gl_variable_storage *storage_tree, const glsl_type *const type, size_t &size, unsigned cumuled_offset) +{ + unsigned* retvalue = (unsigned*) ralloc_array_size(ctx,sizeof(unsigned),1); + if (type->is_record()) { + size = 0; + for (unsigned i = 0; i < type->length; i++) { + size_t tmp_sz; + const unsigned* tmp = linearise_type_tree(ctx, storage_tree->AsRecord.Fields[i], type->fields.structure[i].type, tmp_sz, cumuled_offset); + retvalue = (unsigned*) reralloc_array_size(ctx,retvalue,sizeof(unsigned),size + tmp_sz); + memcpy(retvalue + size,tmp,tmp_sz * sizeof(unsigned)); + size += tmp_sz; + } + + return retvalue; + } + if (type->is_array() && type->fields.array->is_record()) { + size = 0; + for (unsigned i = 0; i < type->length; i++) { + size_t tmp_sz; + const unsigned* tmp = linearise_type_tree(ctx, storage_tree->AsArray.FirstElement, type->fields.array, tmp_sz, i * storage_tree->AsArray.Stride); + retvalue = (unsigned*) reralloc_array_size(ctx,retvalue,sizeof(unsigned),size + tmp_sz); + memcpy(retvalue + size,tmp,tmp_sz * sizeof(unsigned)); + size += tmp_sz; + } + return retvalue; + } + if (type->is_scalar() || type->is_vector()) { + retvalue = (unsigned*) reralloc_array_size(ctx,retvalue,sizeof(unsigned),size); + retvalue[0] = storage_tree->AsLeaf.Offset; + } + if (type->is_array()) { + size = type->length; + retvalue = (unsigned*) reralloc_array_size(ctx,retvalue,sizeof(unsigned),size); + for (unsigned i = 0; i < size; i++) { + retvalue[i] = storage_tree->AsArray.FirstElement->AsLeaf.Offset + i * storage_tree->AsArray.Stride + cumuled_offset; + } + return retvalue; + } + return NULL; +} + +void delayed_ubo_assign_detector::update_offset(const glsl_type *const type) +{ + if (type->is_scalar() || type->is_vector()) { + offset += subtype->AsLeaf.Offset; + } +} + +ir_visitor_status delayed_ubo_assign_detector::visit(ir_dereference_variable *ir) +{ + ir_variable* var = ir->var; + if (var->is_ubo_variable) { + has_ubo_in_it = true; + ubo_index = var->location; + subtype = (union gl_variable_storage *) hash_table_find(const_cast<hash_table*>(ubo_refs),var->name); + offset = 0; + update_offset(ir->type); + return visit_continue; + } + else { + has_ubo_in_it = false; + return visit_stop; + } +} + +ir_visitor_status delayed_ubo_assign_detector::visit_leave(ir_dereference_record *ir) +{ + unsigned i; + const glsl_type* field_type; + for (i = 0; i < ir->record->type->length; i++) { + if (strcmp(ir->record->type->fields.structure[i].name, ir->field) == 0) { + subtype = subtype->AsRecord.Fields[i]; + field_type = ir->record->type->fields.structure[i].type; + } + } + update_offset(field_type); + return visit_continue; +} + +ir_visitor_status delayed_ubo_assign_detector::visit_leave(ir_dereference_array *ir) +{ + unsigned stride = subtype->AsArray.Stride; + const glsl_type* field_type = ir->array->type->fields.array; + ir_constant *idx = ir->array_index->as_constant(); + if (idx) { + offset += stride * idx->value.i[0]; + } + else { + strided_expressions[number_of_strided_expr].stride = stride; + strided_expressions[number_of_strided_expr].expr = ir->array_index; + number_of_strided_expr++; + } + subtype = subtype->AsArray.FirstElement; + update_offset(field_type); + return visit_continue; +} + +ir_visitor_status delayed_ubo_assign_detector::visit_enter(ir_expression *) +{ + has_ubo_in_it = false; + return visit_stop; +} + class glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -299,9 +462,16 @@ public: struct gl_context *ctx; struct gl_program *prog; + struct gl_indexed_ubo_variable *indexed_uniforms; + struct string_to_uint_map *indexed_uniforms_name; + unsigned starting_index; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; + delayed_ubo_assign_detector *ubo_handler; + struct gl_uniform_buffer_object* ubos; + unsigned ubo_number; + int next_temp; int num_address_regs; @@ -326,6 +496,8 @@ public: st_src_reg st_src_reg_for_int(int val); st_src_reg st_src_reg_for_type(int type, int val); + st_src_reg generate_ubo_access_src_reg(unsigned, unsigned, const st_src_reg *index_reg = NULL); + st_src_reg get_reg_from_stride(ir_instruction *ir,const stride_information *, unsigned); /** * \name Visit methods * @@ -1040,8 +1212,10 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } } + variable_storage *storage; st_dst_reg dst; + if (i == ir->num_state_slots) { /* We'll set the index later. */ storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); @@ -1563,6 +1737,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); } } else { + if(op[1].type == GLSL_TYPE_INT) emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; @@ -1894,12 +2069,84 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) this->result = src; } +struct st_src_reg +glsl_to_tgsi_visitor::get_reg_from_stride(ir_instruction *ir,const stride_information *stride_info, unsigned stride_count) +{ + + st_src_reg index_reg = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); + for (unsigned i = 0; i < stride_count; i++) { + ir_rvalue* expr = stride_info[i].expr; + expr->accept(this); + printf("stride is %d\n",stride_info[i].stride); + if (i==0) { + emit(ir, TGSI_OPCODE_UMUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_int(stride_info[i].stride / 16)); + } + else { + st_src_reg tmp_stride = get_temp(native_integers ? + glsl_type::int_type : glsl_type::float_type); + emit(ir, TGSI_OPCODE_UMUL, st_dst_reg(tmp_stride), + this->result, st_src_reg_for_int(stride_info[i].stride / 16)); + emit(ir, TGSI_OPCODE_UADD, st_dst_reg(index_reg), + tmp_stride, index_reg); + } + + } + + return index_reg; +} + +struct st_src_reg +glsl_to_tgsi_visitor::generate_ubo_access_src_reg(unsigned offset, unsigned index2d, const st_src_reg *stride_reg) +{ + unsigned index = offset / 16; + + st_src_reg result = st_src_reg(PROGRAM_UNIFORM,index,0,index2d); + switch(offset - index * 16) { + case 12: + result.swizzle = SWIZZLE_W; + break; + case 8: + result.swizzle = SWIZZLE_Z; + break; + case 4: + result.swizzle = SWIZZLE_Y; + break; + default: + result.swizzle = SWIZZLE_XYZW; + break; + } + + + if (stride_reg) { + result.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(result.reladdr, stride_reg, sizeof(*stride_reg)); + } + + return result; +} + + void glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) { variable_storage *entry = find_variable_storage(ir->var); ir_variable *var = ir->var; + delayed_ubo_assign_detector tmp_ubo(*ubo_handler); + ir->accept(&tmp_ubo); + if (tmp_ubo.has_ubo_in_it) { + st_src_reg stride_reg; + st_src_reg *stride_reg_ptr = NULL; + if (tmp_ubo.number_of_strided_expr > 0) { + stride_reg = get_reg_from_stride(ir,tmp_ubo.strided_expressions,tmp_ubo.number_of_strided_expr); + stride_reg_ptr = &stride_reg; + } + this->result = generate_ubo_access_src_reg(tmp_ubo.offset,tmp_ubo.ubo_index,stride_reg_ptr); + return; + } + if (!entry) { switch (var->mode) { case ir_var_uniform: @@ -1960,6 +2207,20 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) st_src_reg src; int element_size = type_size(ir->type); + delayed_ubo_assign_detector tmp_ubo(*ubo_handler); + ir->accept(&tmp_ubo); + if (tmp_ubo.has_ubo_in_it) { + st_src_reg stride_reg; + st_src_reg *stride_reg_ptr = NULL; + if (tmp_ubo.number_of_strided_expr > 0) { + stride_reg = get_reg_from_stride(ir,tmp_ubo.strided_expressions,tmp_ubo.number_of_strided_expr); + stride_reg_ptr = &stride_reg; + } + this->result = generate_ubo_access_src_reg(tmp_ubo.offset,tmp_ubo.ubo_index,stride_reg_ptr); + return; + } + + index = ir->array_index->constant_expression_value(); ir->array->accept(this); @@ -2012,6 +2273,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) this->result = src; } + void glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) { @@ -2019,6 +2281,19 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) const glsl_type *struct_type = ir->record->type; int offset = 0; + delayed_ubo_assign_detector tmp_ubo(*ubo_handler); + ir->accept(&tmp_ubo); + if (tmp_ubo.has_ubo_in_it) { + st_src_reg stride_reg; + st_src_reg *stride_reg_ptr = NULL; + if (tmp_ubo.number_of_strided_expr > 0) { + stride_reg = get_reg_from_stride(ir,tmp_ubo.strided_expressions,tmp_ubo.number_of_strided_expr); + stride_reg_ptr = &stride_reg; + } + this->result = generate_ubo_access_src_reg(tmp_ubo.offset,tmp_ubo.ubo_index,stride_reg_ptr); + return; + } + ir->record->accept(this); for (i = 0; i < struct_type->length; i++) { @@ -2154,11 +2429,36 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_dst_reg l; st_src_reg r; int i; + l = get_assignment_lhs(ir->lhs, this); + + if (ir->lhs->type->is_record() || ir->lhs->type->is_array()) { + + delayed_ubo_assign_detector tmp_ubo(*ubo_handler); + ir->rhs->accept(&tmp_ubo); + if (tmp_ubo.has_ubo_in_it) { + st_src_reg stride_reg; + st_src_reg *stride_reg_ptr = NULL; + if (tmp_ubo.number_of_strided_expr > 0) { + stride_reg = get_reg_from_stride(ir,tmp_ubo.strided_expressions,tmp_ubo.number_of_strided_expr); + stride_reg_ptr = &stride_reg; + } + void* tmp_ctx = ralloc_context(NULL); + size_t sz; + const unsigned* tmp = delayed_ubo_assign_detector::linearise_type_tree(tmp_ctx,tmp_ubo.subtype,ir->lhs->type,sz); + for (unsigned i = 0; i < sz; i++) { + printf("size is %d\n",sz); + r = generate_ubo_access_src_reg(tmp[i],tmp_ubo.ubo_index,stride_reg_ptr); + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + } + ralloc_free(tmp_ctx); + return; + } + } ir->rhs->accept(this); r = this->result; - l = get_assignment_lhs(ir->lhs, this); /* FINISHME: This should really set to the correct maximal writemask for each * FINISHME: component written (in the loops below). This case can only @@ -3321,6 +3621,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) */ inst->src[r].file = first->src[0].file; inst->src[r].index = first->src[0].index; + inst->src[r].index2d = first->src[0].index2d; int swizzle = 0; for (int i = 0; i < 4; i++) { @@ -4069,10 +4370,11 @@ dst_register(struct st_translate *t, */ static struct ureg_src src_register(struct st_translate *t, - gl_register_file file, - GLuint index) + const st_src_reg* src_reg) { - switch(file) { + int index = src_reg->index; + int index2d = src_reg->index2d; + switch(src_reg->file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); @@ -4088,13 +4390,11 @@ src_register(struct st_translate *t, case PROGRAM_LOCAL_PARAM: case PROGRAM_UNIFORM: assert(index >= 0); - return t->constants[index]; + return ureg_src_register2d(TGSI_FILE_CONSTANT,index,index2d); case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ - if (index < 0) - return ureg_DECL_constant(t->ureg, 0); - else - return t->constants[index]; + assert(index >= 0); + return ureg_src_register(TGSI_FILE_CONSTANT,index); case PROGRAM_IMMEDIATE: return t->immediates[index]; @@ -4149,7 +4449,7 @@ translate_dst(struct st_translate *t, static struct ureg_src translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register(t, src_reg->file, src_reg->index); + struct ureg_src src = src_register(t, src_reg); src = ureg_swizzle(src, GET_SWZ(src_reg->swizzle, 0) & 0x3, @@ -4673,6 +4973,14 @@ st_translate_program( } } + /* Declare UBO + */ + + for (i = 0; i < program->ubo_number ; i++) { + const gl_uniform_buffer_object& ubo = program->ubos[i]; + ureg_DECL_constant2D(t->ureg,0,ubo.Size/16,i+1); + } + if (program->indirect_addr_temps) { /* If temps are accessed with indirect addressing, declare temporaries * in sequential order. Else, we declare them on demand elsewhere. @@ -4684,45 +4992,11 @@ st_translate_program( } } - /* Emit constants and uniforms. TGSI uses a single index space for these, + /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); - if (t->constants == NULL) { - ret = PIPE_ERROR_OUT_OF_MEMORY; - goto out; - } - - for (i = 0; i < proginfo->Parameters->NumParameters; i++) { - switch (proginfo->Parameters->Parameters[i].Type) { - case PROGRAM_ENV_PARAM: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant(ureg, i); - break; - - /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect - * addressing of the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ - case PROGRAM_CONSTANT: - if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant(ureg, i); - else - t->constants[i] = emit_immediate(t, - proginfo->Parameters->ParameterValues[i], - proginfo->Parameters->Parameters[i].DataType, - 4); - break; - default: - break; - } - } + ureg_DECL_constant2D(t->ureg,0,proginfo->Parameters->NumParameters,0); } /* Emit immediate values. @@ -4778,6 +5052,7 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + out: if (t) { FREE(t->insn); @@ -4806,6 +5081,14 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader *shader) { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + v->indexed_uniforms = shader_program->IndexedUBOVariables; + v->indexed_uniforms_name = shader_program->NamedAccessUBOVariables; + v->starting_index = shader_program->NumUserUniformStorage; + v->ubo_handler = new delayed_ubo_assign_detector( + delayed_ubo_assign_detector::prepopulate(shader_program,shader)); + v->ubos = shader_program->UniformBufferObject; + v->ubo_number = shader_program->UBOCount; + struct gl_program *prog; struct pipe_screen * screen = st_context(ctx)->pipe->screen; unsigned pipe_shader_type; @@ -4847,7 +5130,7 @@ get_mesa_program(struct gl_context *ctx, v->shader_program = shader_program; v->options = options; v->glsl_version = ctx->Const.GLSLVersion; - v->native_integers = ctx->Const.NativeIntegers; + v->native_integers = true;// ctx->Const.NativeIntegers; _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); @@ -4909,18 +5192,18 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); v->copy_propagate(); - while (v->eliminate_dead_code_advanced()); + //while (v->eliminate_dead_code_advanced()); /* FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ - if (!v->indirect_addr_temps) { + /*if (!v->indirect_addr_temps) { v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); - } + }*/ /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev