We can not use the VUE Dereference flags combination for EOT message under ILK and SNB because the threads are not initialized there with initial VUE handle unlike Pre-IL. So to avoid GPU hangs on SNB and ILK we need to avoid usage of the VUE Dereference flags combination. (Was tested only on SNB but according to the specification SNB Volume 2 Part 1: 1.6.5.3, 1.6.5.6 the ILK must behave itself in the similar way)
v2: Approach to fix this issue was changed. Instead of different EOT flags in the program end we will create VUE every time even if GS produces no output. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105399 Signed-off-by: Andrii Simiklit <andrii.simik...@globallogic.com> --- src/intel/compiler/gen6_gs_visitor.cpp | 88 +++++++++------------------------- 1 file changed, 23 insertions(+), 65 deletions(-) diff --git a/src/intel/compiler/gen6_gs_visitor.cpp b/src/intel/compiler/gen6_gs_visitor.cpp index ac3ba55..b831d33 100644 --- a/src/intel/compiler/gen6_gs_visitor.cpp +++ b/src/intel/compiler/gen6_gs_visitor.cpp @@ -300,11 +300,10 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf, /* Otherwise we always request to allocate a new VUE handle. If this is * the last write before the EOT message and the new handle never gets * used it will be dereferenced when we send the EOT message. This is - * necessary to avoid different setups (under Pre-IL only) for the EOT message (one for the + * necessary to avoid different setups for the EOT message (one for the * case when there is no output and another for the case when there is) * which would require to end the program with an IF/ELSE/ENDIF block, - * something we do not want. - * But for ILK and SNB we can not avoid the end the program with an IF/ELSE/ENDIF block. + * something we do not want. */ inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE); inst->urb_write_flags = BRW_URB_WRITE_COMPLETE; @@ -351,27 +350,27 @@ gen6_gs_visitor::emit_thread_end() int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen); /* Issue the FF_SYNC message and obtain the initial VUE handle. */ - emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G)); - emit(IF(BRW_PREDICATE_NORMAL)); - { - this->current_annotation = "gen6 thread end: ff_sync"; + this->current_annotation = "gen6 thread end: ff_sync"; - vec4_instruction *inst; - if (prog->info.has_transform_feedback_varyings) { + vec4_instruction *inst = NULL; + if (prog->info.has_transform_feedback_varyings) { src_reg sol_temp(this, glsl_type::uvec4_type); emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, - dst_reg(this->svbi), - this->vertex_count, - this->prim_count, - sol_temp); + dst_reg(this->svbi), + this->vertex_count, + this->prim_count, + sol_temp); inst = emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count, this->svbi); - } else { + } else { inst = emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count, brw_imm_ud(0u)); - } - inst->base_mrf = base_mrf; + } + inst->base_mrf = base_mrf; + emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G)); + emit(IF(BRW_PREDICATE_NORMAL)); + { /* Loop over all buffered vertices and emit URB write messages */ this->current_annotation = "gen6 thread end: urb writes init"; src_reg vertex(this, glsl_type::uint_type); @@ -415,7 +414,7 @@ gen6_gs_visitor::emit_thread_end() dst_reg reg = dst_reg(MRF, mrf); reg.type = output_reg[varying][0].type; data.type = reg.type; - vec4_instruction *inst = emit(MOV(reg, data)); + inst = emit(MOV(reg, data)); inst->force_writemask_all = true; mrf++; @@ -450,11 +449,8 @@ gen6_gs_visitor::emit_thread_end() if (prog->info.has_transform_feedback_varyings) xfb_write(); } - const bool common_eot_approach_can_be_used = (devinfo->gen < 5); - if(common_eot_approach_can_be_used) - { - emit(BRW_OPCODE_ENDIF); - } + emit(BRW_OPCODE_ENDIF); + /* Finally, emit EOT message. * * In gen6 we need to end the thread differently depending on whether we have @@ -464,35 +460,11 @@ gen6_gs_visitor::emit_thread_end() * * However, this would lead us to end the program with an ENDIF opcode, * which we want to avoid, so what we do is that we always request a new - * VUE handle every time we do a URB WRITE, even for the last vertex we emit. + * VUE handle every time, even if GS produces no output. * With this we make sure that whether we have emitted at least one vertex * or none at all, we have to finish the thread without writing to the URB, - * which works for both cases (but only under Pre-IL) by setting - * the COMPLETE and UNUSED flags in the EOT message. - * - * But under ILK or SNB we must not use combination COMPLETE and UNUSED - * because this combination could be used only for already allocated VUE. - * But unlike Pre-IL in the ILK and SNB - * the initial VUE is not passed to threads. - * This behaver mentioned in specification: - * SNB Volume 2 Part 1: - * "1.6.5.3 VUE Allocation (GS, CLIP) [DevIL]" - * "1.6.5.4 VUE Allocation (GS) [DevSNB+]" - * "The threads are not passed an initial handle. - * Instead, they request a first handle (if any) - * via the URB shared function’s FF_SYNC message (see Shared Functions). - * If additional handles are required, - * the URB_WRITE allocate mechanism (mentioned above) is used." - * - * So for ILK and for SNB we must use only UNUSED flag. - * This is accepteble combination according to: - * SNB Volume 4 Part 2: - * "2.4.2 Message Descriptor" - * "Table lists the valid and invalid combinations of - * the Complete, Used, Allocate and EOT bits" - * "Thread terminate non-write of URB" - * SNB Volume 2 Part 1: - * "1.6.5.6 Thread Termination" + * which works for both cases by setting the COMPLETE and UNUSED flags in + * the EOT message. */ this->current_annotation = "gen6 thread end: EOT"; @@ -504,26 +476,12 @@ gen6_gs_visitor::emit_thread_end() emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data); } - vec4_instruction *inst = emit(GS_OPCODE_THREAD_END); + inst = emit(GS_OPCODE_THREAD_END); inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED; inst->base_mrf = base_mrf; inst->mlen = 1; - - if(!common_eot_approach_can_be_used) - { - emit(BRW_OPCODE_ELSE); - - this->current_annotation = "gen6 thread end: EOT"; - - vec4_instruction *unused_urb_inst = emit(GS_OPCODE_THREAD_END); - unused_urb_inst->urb_write_flags = BRW_URB_WRITE_UNUSED; - unused_urb_inst->base_mrf = base_mrf; - unused_urb_inst->mlen = 1; - - emit(BRW_OPCODE_ENDIF); - } } - + void gen6_gs_visitor::setup_payload() { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev