Before you read any further, this is nowhere close to working. However it's in a state where I think most of the structure is there, albeit with a lot of XXX comments. And I haven't actually implemented the new opcodes I've added.
I was hoping one or two Intel people could take a look at this and let me know of any pitfalls I'm likely to run into. I've already gotten a lot of help and advice from Ken, but wanted to put something out publicly. Any and all feedback much appreciated! Not-Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 13 + src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 36 ++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 5 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++ src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 285 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 63 +++++ 8 files changed, 405 insertions(+), 13 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.h diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index dc30eb3..96b637f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -119,6 +119,7 @@ i965_FILES = \ gen6_clip_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ + gen6_gs_visitor.cpp \ gen6_multisample_state.c \ gen6_queryobj.c \ gen6_sampler_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 01d3cb6..a24919c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -909,6 +909,19 @@ enum opcode { * - dst is the GRF for gl_InvocationID. */ GS_OPCODE_GET_INSTANCE_ID, + + /** + * Sets DWORD 2 of dst to the value in src DWORD 0. Used by geometry + * shaders to initialize DWORD 2 of the message header, which contains + * primitive start/end flags. + */ + GS_OPCODE_SET_DWORD_2, + + /** + * Emits a FF_SYNC, which on Gen6 returns a VUE handle, which is needed to + * emit state in GS. + */ + GS_OPCODE_FF_SYNC, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6bd8b80..14d67b7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -130,6 +130,7 @@ public: bool is_one() const; src_reg(class vec4_visitor *v, const struct glsl_type *type); + src_reg(class vec4_visitor *v, const struct glsl_type *type, int size); explicit src_reg(dst_reg reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 0a2d8ff..ede9002 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -28,6 +28,7 @@ */ #include "brw_vec4_gs_visitor.h" +#include "gen6_gs_visitor.h" const unsigned MAX_GS_INPUT_VERTICES = 6; @@ -587,6 +588,8 @@ brw_gs_emit(struct brw_context *brw, { struct brw_shader *shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; + vec4_gs_visitor *gs; + const unsigned *ret = NULL; if (unlikely(INTEL_DEBUG & DEBUG_GS)) { printf("GLSL IR for native geometry shader %d:\n", prog->Name); @@ -602,12 +605,17 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.dual_instanced_dispatch = false; - vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */); - if (v.run()) { - return generate_assembly(brw, prog, &c->gp->program.Base, - &c->prog_data.base, mem_ctx, &v.instructions, - final_assembly_size); + if (brw->gen >= 7) + gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */); + else + gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */); + if (gs->run()) { + ret = generate_assembly(brw, prog, &c->gp->program.Base, + &c->prog_data.base, mem_ctx, &gs->instructions, + final_assembly_size); + goto done; } + delete gs; } /* Either we failed to compile in DUAL_OBJECT mode (probably because it @@ -622,15 +630,21 @@ brw_gs_emit(struct brw_context *brw, */ c->prog_data.dual_instanced_dispatch = true; - vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, false /* no_spills */); - if (!v.run()) { + if (brw->gen >= 7) + gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */); + else + gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */); + if (!gs->run()) { prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - return NULL; + ralloc_strcat(&prog->InfoLog, gs->fail_msg); + goto done; } - return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, - mem_ctx, &v.instructions, final_assembly_size); + ret = generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, + mem_ctx, &gs->instructions, final_assembly_size); +done: + delete gs; + return ret; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index 68756f7..7a4a262 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -96,14 +96,15 @@ protected: virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + src_reg vertex_count; + const struct brw_gs_compile * const c; + private: int setup_varying_inputs(int payload_reg, int *attribute_map, int attributes_per_reg); void emit_control_data_bits(); - src_reg vertex_count; src_reg control_data_bits; - const struct brw_gs_compile * const c; }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 601b364..0ebf118 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -616,6 +616,20 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) +{ + assert(size > 0); + + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type) * size); + + this->swizzle = BRW_SWIZZLE_NOOP; + + this->type = brw_type_for_base_type(type); +} + dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp new file mode 100644 index 0000000..7be04d0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -0,0 +1,285 @@ +/* + * Copyright © 2014 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file gen6_gs_visitor.cpp + * + * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor. + */ + +#include "gen6_gs_visitor.h" + +namespace brw { + +void +gen6_gs_visitor::emit_prolog() +{ + vec4_gs_visitor::emit_prolog(); + + /* vertex_output layout: + * + * This is an array that contains all the data emitted during the runtime + * of this GP. For each emitted vertex, there are vue_map.num_slots data + * items, and an extra register used to store flags. This register comes + * after the first num_slots. To match the URB_WRITE message header, bit 0 + * is PrimEnd, and bit 1 is PrimStart. The next vertex then continues + * afterwards. + */ + this->vertex_output = src_reg(this, glsl_type::uint_type, + (prog_data->vue_map.num_slots + 1) * + c->gp->program.VerticesOut); + this->vertex_output_offset = src_reg(this, glsl_type::uint_type); + emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u))); + + this->first_vertex = src_reg(this, glsl_type::uint_type); + emit(MOV(dst_reg(this->first_vertex), src_reg(2u))); +} + +void +gen6_gs_visitor::visit(ir_emit_vertex *) +{ + /* copy all of the outputs into the temporary area */ + + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + { + for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + /* xxx generalize emit_urb_slot and use that, otherwise gl_Position & + * co don't work */ + emit_generic_urb_slot(dst, prog_data->vue_map.slot_to_varying[slot]); + emit(ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, src_reg(1u))); + } + + /* XXX write the flags in a format that will make implementation of the + * "write flags" opcode easy. + */ + dst_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg)); + if (c->gp->program.OutputType == GL_POINTS) { + /* Each point starts and ends the vertex */ + emit(MOV(dst, src_reg(3u))); + } else { + /* Set the flags to first_vertex, which will be set to 2 when it's + * the first, or 0 if it's not the first. Zero out first_vertex so + * that future runs will work correctly as well. + */ + emit(MOV(dst, this->first_vertex)); + emit(MOV(dst_reg(this->first_vertex), src_reg(0u))); + } + + emit(ADD(dst_reg(this->vertex_output_offset), + this->vertex_output_offset, src_reg(1u))); + + emit(ADD(dst_reg(this->vertex_count), + this->vertex_count, src_reg(1u))); + } + emit(BRW_OPCODE_ENDIF); +} + +void +gen6_gs_visitor::visit(ir_end_primitive *) +{ + /* This has no effect for GL_POINTS */ + if (c->gp->program.OutputType == GL_POINTS) + return; + + /* mark the current vertex as ending the primitive */ + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + { + /* vertex_output_offset is already pointing at the first entry of the + * next vertex. So subtract 1 to modify the flags for the previous + * vertex. + */ + src_reg offset(this, glsl_type::uint_type); + emit(MOV(dst_reg(offset), this->vertex_output_offset)); + emit(ADD(dst_reg(offset), offset, src_reg(~0u))); + + src_reg dst(this->vertex_output); + dst.reladdr = ralloc(mem_ctx, src_reg); + memcpy(dst.reladdr, &offset, sizeof(src_reg)); + + /* Set the 0 bit to indicate that the primitive is ending */ + emit(OR(dst_reg(dst), dst, src_reg(1u))); + + /* Set the first vertex flag to indicate that the next vertex will start + * a primitive + */ + emit(MOV(dst_reg(this->first_vertex), src_reg(2u))); + } + emit(BRW_OPCODE_ENDIF); +} + +void +gen6_gs_visitor::emit_thread_end() +{ + /* Now we take all of the vertex data previously written into + * vertex_output, and emit it for real. This involves looping through all + * the vertices, loading the relevant data back into the MRFs, and emitting + * write opcodes. + * + * Note that this has to be done before move_grf_array_access_to_scratch + * runs and does its magic, otherwise we have to manually deal with scratch + * space. + */ + + src_reg i(this, glsl_type::uint_type); + emit(MOV(dst_reg(i), src_reg(0u))); + + src_reg offset(this, glsl_type::uint_type); + emit(MOV(dst_reg(offset), src_reg(0u))); + + src_reg vue_handle(this, glsl_type::uint_type); + src_reg prim_flags(this, glsl_type::uint_type); + + /* XXX check if FF_SYNC really needs the # of primitives generated, if it + * does, figure out exactly what it wants a counter of, and count it. + */ + /* XXX look into SO and whether it interacts with this GS logic in any way. + */ + emit(GS_OPCODE_FF_SYNC, dst_reg(vue_handle)); + + /* XXX double-check loop logic -- can this be done using a regular + * condition instead of a nested if inside an infinite do/while? + */ + emit(BRW_OPCODE_DO); + { + emit(CMP(dst_null_d(), this->vertex_count, i, BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + { + emit(BRW_OPCODE_BREAK); + } + emit(BRW_OPCODE_ENDIF); + + /* Writes out a single vertex's worth data. This is basically the same + * as vec4_visitor::emit_vertex, except that it's using vertex_output as + * its source rather than the varying slots + */ + + int base_mrf = 1; + int mrf = base_mrf; + /* MRF 14/15 are used for spill handling */ + int max_usable_mrf = 13; + + dst_reg base_mrf_reg = dst_reg(MRF, base_mrf); + base_mrf_reg.type = BRW_REGISTER_TYPE_UD; + + emit_urb_write_header(mrf++); + + /* XXX double-check that these really are the right primitive ids */ + unsigned prim_type; + switch (c->gp->program.OutputType) { + case GL_POINTS: + prim_type = _3DPRIM_POINTLIST << 2; + break; + case GL_LINE_STRIP: + prim_type = _3DPRIM_LINESTRIP << 2; + break; + case GL_TRIANGLE_STRIP: + prim_type = _3DPRIM_TRISTRIP << 2; + break; + default: + assert(!"Unexpected output type"); + prim_type = 0; + break; + } + + /* Location of the primitive flags for the current vertex */ + src_reg flag_offset(this, glsl_type::uint_type); + emit(ADD(dst_reg(flag_offset), + offset, src_reg(prog_data->vue_map.num_slots))); + + /* Pointer to the primitive flag data for the current vertex */ + src_reg prim_data(this->vertex_output); + prim_data.reladdr = ralloc(mem_ctx, src_reg); + memcpy(&prim_data.reladdr, &flag_offset, sizeof(src_reg)); + + /* Combine the current vertex prim start/end flags with the primitive + * type + */ + emit(OR(dst_reg(prim_flags), prim_data, src_reg(prim_type))); + + emit(GS_OPCODE_SET_DWORD_2, base_mrf_reg, prim_flags); + + src_reg data(this->vertex_output); + data.reladdr = ralloc(mem_ctx, src_reg); + memcpy(data.reladdr, &offset, sizeof(src_reg)); + + int slot = 0; + bool complete = false; + do { + /* URB offset is in URB row increments, and each of our MRFs is half of + * one of those, since we're doing interleaved writes. + */ + int urb_offset = slot / 2; + + mrf = base_mrf + 1; + for (; slot < prog_data->vue_map.num_slots; ++slot) { + dst_reg reg = dst_reg(MRF, mrf++); + reg.type = BRW_REGISTER_TYPE_F; + + emit(MOV(reg, data)); + + /* XXX double-check that adding to offset will also alter the + * reladdr above, or if we need to create a fresh offset for each + * slot. */ + emit(ADD(dst_reg(offset), offset, src_reg(1u))); + + /* If this was max_usable_mrf, we can't fit anything more into this + * URB WRITE. + */ + if (mrf > max_usable_mrf) { + slot++; + break; + } + } + + complete = slot >= prog_data->vue_map.num_slots; + current_annotation = "URB write"; + vec4_instruction *inst = emit_urb_write_opcode(complete); + inst->base_mrf = base_mrf; + inst->mlen = mrf - base_mrf; + if ((inst->mlen % 2) != 1) + inst->mlen++; + inst->offset += urb_offset; + } while (!complete); + + emit(ADD(dst_reg(i), i, src_reg(1u))); + } + emit(BRW_OPCODE_WHILE); + + /* XXX more thread ending logic here, perhaps similar to what + * vec4_gs_visitor::emit_thread_end does. Need to check the docs. + */ +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h new file mode 100644 index 0000000..b519438 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -0,0 +1,63 @@ +/* + * Copyright © 2014 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file gen6_gs_visitor.h + * + * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor. + */ + +#ifndef GEN6_GS_VISITOR_H +#define GEN6_GS_VISITOR_H + +#include "brw_vec4.h" +#include "brw_vec4_gs_visitor.h" + +#ifdef __cplusplus +namespace brw { + +class gen6_gs_visitor : public vec4_gs_visitor { + public: + gen6_gs_visitor(struct brw_context *brw, + struct brw_gs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader, + void *mem_ctx, + bool no_spills) : + vec4_gs_visitor(brw, c, prog, shader, mem_ctx, no_spills) {} + protected: + virtual void emit_prolog(); + virtual void emit_thread_end(); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + + private: + src_reg vertex_output; + src_reg vertex_output_offset; + src_reg first_vertex; +}; + +} /* namespace brw */ +#endif /* __cplusplus */ + +#endif /* GEN6_VS_VISITOR_H */ -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev