----- Original Message ----- > The issue with SOA execution and end_primitive opcode is that it > can be executed both when we haven't emitted any vertices, in > which case we don't want to emit an empty primitive, and when > the execution mask is zero and the execution should be skipped. We > handled only the latter of those conditions. Now we're combining the > execution mask with a mask created from emitted vertices to handle > both cases. As a result we don't need the pending_end_primitive > flag which was broken because it was static and could be affected > by both above mentioned conditions at run-time. > > Signed-off-by: Zack Rusin <za...@vmware.com> > --- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 6 -- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 71 > ++++++++++++++++------- > 2 files changed, 50 insertions(+), 27 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > index 23ccacc..f1b1d79 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > @@ -396,12 +396,6 @@ struct lp_build_tgsi_soa_context > LLVMValueRef emitted_prims_vec_ptr; > LLVMValueRef total_emitted_vertices_vec_ptr; > LLVMValueRef emitted_vertices_vec_ptr; > - /* if a shader doesn't have ENDPRIM instruction but it has > - * a number of EMIT instructions it means the END instruction > - * implicitly invokes ENDPRIM. handle this via a flag here > - * in the future maybe we can enforce TGSI to always have > - * an explicit ENDPRIM */ > - boolean pending_end_primitive; > > LLVMValueRef consts_ptr; > const LLVMValueRef *pos; > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 8a29635..b54fc13 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -65,6 +65,7 @@ > #include "lp_bld_sample.h" > #include "lp_bld_struct.h" > > +/*#define DUMP_GS_EMITS 1*/
#define DUMP_GS_EMITS 0 > > static void lp_exec_mask_init(struct lp_exec_mask *mask, struct > lp_build_context *bld) > { > @@ -2278,27 +2279,25 @@ emit_vertex( > increment_vec_ptr_by_mask(bld_base, > bld->total_emitted_vertices_vec_ptr, > masked_ones); > #if DUMP_GS_EMITS > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex > masked ones = ", > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ emit vertex masked ones = ", > masked_ones); > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex > emitted = ", > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ emit vertex emitted = ", > total_emitted_vertices_vec); > #endif > - bld->pending_end_primitive = TRUE; > } > } > > > static void > -end_primitive( > - const struct lp_build_tgsi_action * action, > - struct lp_build_tgsi_context * bld_base, > - struct lp_build_emit_data * emit_data) > +end_primitive_masked(struct lp_build_tgsi_context * bld_base, > + LLVMValueRef masked_ones) > { > struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); > LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > > if (bld->gs_iface->end_primitive) { > - LLVMValueRef masked_ones = mask_to_one_vec(bld_base); > LLVMValueRef emitted_vertices_vec = > LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); > LLVMValueRef emitted_prims_vec = > @@ -2309,23 +2308,55 @@ end_primitive( > emitted_prims_vec); > > #if DUMP_GS_EMITS > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim masked > ones = ", > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ end prim masked ones = ", > masked_ones); > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim > emitted verts1 = ", > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ end prim emitted verts1 = ", > emitted_vertices_vec); > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim > emitted prims1 = ", > - LLVMBuildLoad(builder, > bld->emitted_prims_vec_ptr, "")); > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ end prim emitted prims1 = ", > + LLVMBuildLoad(builder, > + bld->emitted_prims_vec_ptr, "")); > #endif > increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, > masked_ones); > clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, > masked_ones); > #if DUMP_GS_EMITS > - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim > emitted verts2 = ", > - LLVMBuildLoad(builder, > bld->emitted_vertices_vec_ptr, "")); > + lp_build_print_value(bld->bld_base.base.gallivm, > + " +++ end prim emitted verts2 = ", > + LLVMBuildLoad(builder, > + bld->emitted_vertices_vec_ptr, > "")); > #endif > + } > > - bld->pending_end_primitive = FALSE; > +} > + > +static void > +end_primitive( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); > + > + if (bld->gs_iface->end_primitive) { > + LLVMBuilderRef builder = bld_base->base.gallivm->builder; > + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); > + struct lp_build_context *uint_bld = &bld_base->uint_bld; > + LLVMValueRef emitted_verts = LLVMBuildLoad( > + builder, bld->emitted_vertices_vec_ptr, ""); > + LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, > + emitted_verts, > + uint_bld->zero); > + /* We need to combine the current execution mask with the mask > + telling us which, if any, execution slots actually have > + unemitted primitives, this way we make sure that end_primitives > + executes only on the paths that have unflushed vertices */ > + masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, ""); > + > + end_primitive_masked(bld_base, masked_ones); > } > } > > @@ -2653,11 +2684,10 @@ static void emit_epilogue(struct > lp_build_tgsi_context * bld_base) > if (bld->gs_iface) { > LLVMValueRef total_emitted_vertices_vec; > LLVMValueRef emitted_prims_vec; > - /* flush the accumulated vertices as a primitive */ > - if (bld->pending_end_primitive) { > - end_primitive(NULL, bld_base, NULL); > - bld->pending_end_primitive = FALSE; > - } > + /* implicit end_primitives, needed in case there are any unflushed > + vertices in the cache */ > + end_primitive(NULL, bld_base, NULL); > + > total_emitted_vertices_vec = > LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); > emitted_prims_vec = > @@ -2767,7 +2797,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, > /* inputs are always indirect with gs */ > bld.indirect_files |= (1 << TGSI_FILE_INPUT); > bld.gs_iface = gs_iface; > - bld.pending_end_primitive = FALSE; > bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; > bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; > bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; > -- > 1.7.10.4 > > Looks good to me. Reviewed-by: Jose Fonseca <jfons...@vmware.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev