From: Samuel Iglesias Gonsalvez <sigles...@igalia.com> Notice that Skylake needs to include a header in the sampler message so it will need some tweaks to work there.
Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com> --- src/glsl/lower_ubo_reference.cpp | 182 +++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_defines.h | 3 + src/mesa/drivers/dri/i965/brw_shader.cpp | 3 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4.h | 6 + src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 31 ++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 49 ++++++ 7 files changed, 275 insertions(+) diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 8cb28f5..a5aaf07 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -167,6 +167,16 @@ public: ir_ssbo_store *ssbo_write(ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask); + ir_visitor_status visit_enter(class ir_expression *); + void check_ssbo_unsized_array_length_expression(class ir_expression *); + void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); + + ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, ir_dereference *, ir_variable *); + ir_expression *emit_ssbo_unsized_array_length(ir_variable *base_offset, + unsigned int deref_offset, + unsigned int unsized_array_stride); + unsigned calculate_unsized_array_stride(ir_dereference *deref); + void *mem_ctx; struct gl_shader *shader; struct gl_uniform_buffer_variable *ubo_var; @@ -766,6 +776,177 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, row_major, matrix_columns, write_mask); } +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_expression *ir) +{ + check_ssbo_unsized_array_length_expression(ir); + return rvalue_visit(ir); +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *former_ir) +{ + if (former_ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { + /* Don't replace this unop if it is found alone. It is going to be + * removed by the optimization passes or replaced if it is part of + * an ir_assignment or another ir_expression. + */ + return; + } + + for (unsigned i = 0; i < 4; i++) { + if (!former_ir->operands[i] || former_ir->operands[i]->ir_type != ir_type_expression) + continue; + ir_expression *ir = (ir_expression *) former_ir->operands[i]; + if (ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { + ir_rvalue *rvalue = ir->operands[0]->as_rvalue(); + if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) + return; + + ir_dereference *deref = ir->operands[0]->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->operands[0]->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return; + /* Now replace the unop instruction for the triop */ + ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var); + delete ir; + former_ir->operands[i] = temp; + } + } +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) +{ + if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) + return; + + ir_expression *expr = (ir_expression *) ir->rhs; + if (expr->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { + ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); + if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) + return; + + ir_dereference *deref = expr->operands[0]->as_dereference(); + if (!deref) + return; + + ir_variable *var = expr->operands[0]->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return; + /* Now replace the unop instruction for the binop */ + ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var); + delete expr; + ir->rhs = temp; + return; + } + return; +} + +ir_expression * +lower_ubo_reference_visitor::emit_ssbo_unsized_array_length(ir_variable *base_offset, + unsigned int deref_offset, + unsigned int unsized_array_stride) +{ + ir_rvalue *offset = + add(base_offset, new(mem_ctx) ir_constant(deref_offset)); + ir_rvalue *stride = new(mem_ctx) ir_constant(unsized_array_stride); + ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); + return new(mem_ctx) ir_expression(ir_triop_ssbo_unsized_array_length, + glsl_type::int_type, block_ref, offset, stride); +} + +unsigned +lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref) +{ + unsigned array_stride = 0; + + switch (deref->ir_type) { + case ir_type_dereference_variable: + { + ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; + const struct glsl_type *unsized_array_type = NULL; + /* An unsized array can be sized by other lowering passes, so pick + * the first field of the array which has the data type of the unsized + * array. + */ + unsized_array_type = deref_var->var->type->fields.array; + + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_var); + + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + break; + } + case ir_type_dereference_record: + { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + const struct glsl_type *deref_record_type = + deref_record->record->as_dereference()->type; + unsigned record_length = deref_record_type->length; + /* Unsized array is always the last element of the interface */ + const struct glsl_type *unsized_array_type = + deref_record_type->fields.structure[record_length - 1].type->fields.array; + + const bool array_row_major = + is_dereferenced_thing_row_major(deref_record); + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + break; + } + default: + assert(!"Not reached"); + } + return array_stride; +} + +ir_expression * +lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, + ir_dereference *deref, + ir_variable *var) +{ + mem_ctx = ralloc_parent(*rvalue); + + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + bool is_buffer; + unsigned unsized_array_stride = calculate_unsized_array_stride(deref); + + /* Compute the offset to the start if the dereference as well as other + * information we need to configure the length + */ + setup_for_load_or_write(var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + &is_buffer); + assert(offset && is_buffer); + + /* Now that we've calculated the offset to the start of the + * dereference, emit writes from the temporary to memory + */ + ir_variable *write_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "ssbo_length_temp_offset", + ir_var_temporary); + base_ir->insert_after(write_offset); + base_ir->insert_after(assign(write_offset, offset)); + + ir_expression *new_ssbo = emit_ssbo_unsized_array_length(write_offset, + const_offset, + unsized_array_stride); + + return new_ssbo; +} + void lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir) { @@ -805,6 +986,7 @@ lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir) ir_visitor_status lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) { + check_ssbo_unsized_array_length_assignment(ir); check_for_ssbo_write(ir); return rvalue_visit(ir); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 11cb3fa..bb2c12d 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -971,6 +971,9 @@ enum opcode { VS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + + VS_OPCODE_UNSIZED_ARRAY_LENGTH, + VS_OPCODE_UNPACK_FLAGS_SIMD4X2, /** diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index c1fd859..ff95afa 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -598,6 +598,9 @@ brw_instruction_name(enum opcode op) case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: return "set_simd4x2_header_gen9"; + case VS_OPCODE_UNSIZED_ARRAY_LENGTH: + return "vs_unsized_array_length"; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: return "unpack_flags_simd4x2"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index b83d7bb..780e50a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -326,6 +326,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case VS_OPCODE_UNSIZED_ARRAY_LENGTH: return inst->header_size; default: unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f24107c..3d3fcc7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -507,6 +507,12 @@ private: struct brw_reg offset); void generate_set_simd4x2_header_gen9(vec4_instruction *inst, struct brw_reg dst); + + void generate_unsized_array_length(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_unpack_flags(struct brw_reg dst); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index e6782fb..d0c8108 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1033,6 +1033,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, } void +vec4_generator::generate_unsized_array_length(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg surf_index) +{ + assert(brw->gen >= 7); + assert(surf_index.type == BRW_REGISTER_TYPE_UD && + surf_index.file == BRW_IMMEDIATE_VALUE); + + brw_SAMPLE(p, + dst, + inst->base_mrf, + src, + surf_index.dw1.ud, + 0, + GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO, + 1, /* response length */ + inst->mlen, + inst->header_size > 0, + BRW_SAMPLER_SIMD_MODE_SIMD4X2, + BRW_SAMPLER_RETURN_FORMAT_SINT32); + + brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); +} + +void vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, @@ -1405,6 +1431,11 @@ vec4_generator::generate_code(const cfg_t *cfg) generate_set_simd4x2_header_gen9(inst, dst); break; + + case VS_OPCODE_UNSIZED_ARRAY_LENGTH: + generate_unsized_array_length(inst, dst, src[0], src[1]); + break; + case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4c83ecf..5268582 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1554,6 +1554,9 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_noise: unreachable("not reached: should be handled by lower_noise"); + case ir_unop_ssbo_unsized_array_length: + unreachable("not reached: should be handled by lower_ubo_reference"); + case ir_binop_add: emit(ADD(result_dst, op[0], op[1])); break; @@ -1927,6 +1930,52 @@ vec4_visitor::visit(ir_expression *ir) emit(BFE(result_dst, op[2], op[1], op[0])); break; + case ir_triop_ssbo_unsized_array_length: { + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); + unsigned ubo_index = const_uniform_block->value.u[0]; + ir_constant *const_offset_ir = ir->operands[1]->as_constant(); + int const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; + ir_constant *const_stride_ir = ir->operands[2]->as_constant(); + int unsized_array_stride = const_stride_ir ? const_stride_ir->value.u[0] : 1; + + assert(shader->base.UniformBlocks[ubo_index].IsBuffer); + + src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start + + ubo_index); + + dst_reg buffer_size = dst_reg(this, ir->type); + + vec4_instruction *inst = new(mem_ctx) vec4_instruction( + VS_OPCODE_UNSIZED_ARRAY_LENGTH, buffer_size); + + inst->base_mrf = 2; + inst->mlen = 1; /* always at least one */ + inst->src[1] = src_reg(surf_index); + + /* MRF for the first parameter */ + src_reg lod = src_reg(0); + int param_base = inst->base_mrf; + int writemask = WRITEMASK_X; + emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod)); + + emit(inst); + + /* array.length() = + max((buffer_object_size - offset_of_array) / stride_of_array, 0) */ + emit(ADD(buffer_size, src_reg(buffer_size), brw_imm_d(-const_offset))); + + assert(unsized_array_stride > 0); + + src_reg stride = src_reg(unsized_array_stride); + dst_reg temp = dst_reg(this, glsl_type::int_type); + emit_math(SHADER_OPCODE_INT_QUOTIENT, + temp, + src_reg(buffer_size), + stride); + emit_minmax(BRW_CONDITIONAL_GE, result_dst, src_reg(temp), brw_imm_d(0)); + break; + } + case ir_triop_vector_insert: unreachable("should have been lowered by lower_vector_insert"); -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev