Series looks good to me. Reviewed-by: Jose Fonseca <jfons...@vmware.com>
----- Original Message ----- > The specification says that the geometry shader should exit if the > number of emitted vertices is bigger or equal to max_output_vertices and > we can't do that because we're running in the SoA mode, which means that > our storing routines will keep getting called on channels that have > overflown (even though they will be masked out, but we just can't skip > them). > So we need some scratch area where we can keep writing the overflown > vertices without overwriting anything important or crashing. > --- > src/gallium/auxiliary/draw/draw_gs.c | 50 > +++++++++++++++++++---- > src/gallium/auxiliary/draw/draw_gs.h | 1 + > src/gallium/auxiliary/draw/draw_llvm.c | 6 +-- > src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 34 ++++++++++++++- > 5 files changed, 81 insertions(+), 11 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_gs.c > b/src/gallium/auxiliary/draw/draw_gs.c > index 5771aad..73917be 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.c > +++ b/src/gallium/auxiliary/draw/draw_gs.c > @@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader > *shader, > int max_prims_per_invocation = 0; > char *output_ptr = (char*)shader->gs_output; > int i, j, prim_idx; > + unsigned next_prim_boundary = shader->primitive_boundary; > > for (i = 0; i < shader->vector_length; ++i) { > int prims = shader->llvm_emitted_primitives[i]; > @@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader > *shader, > total_verts += shader->llvm_emitted_vertices[i]; > } > > - > output_ptr += shader->emitted_vertices * shader->vertex_size; > for (i = 0; i < shader->vector_length - 1; ++i) { > int current_verts = shader->llvm_emitted_vertices[i]; > - > - if (current_verts != shader->max_output_vertices) { > - memcpy(output_ptr + (vertex_count + current_verts) * > shader->vertex_size, > - output_ptr + (vertex_count + shader->max_output_vertices) * > shader->vertex_size, > - shader->vertex_size * (total_verts - vertex_count)); > + int next_verts = shader->llvm_emitted_vertices[i + 1]; > +#if 0 > + int j; > + for (j = 0; j < current_verts; ++j) { > + struct vertex_header *vh = (struct vertex_header *) > + (output_ptr + shader->vertex_size * (i * next_prim_boundary + > j)); > + debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, > + vh->data[0][0], vh->data[0][1], vh->data[0][2], > vh->data[0][3]); > + > + } > +#endif > + debug_assert(current_verts <= shader->max_output_vertices); > + debug_assert(next_verts <= shader->max_output_vertices); > + if (next_verts) { > + memmove(output_ptr + (vertex_count + current_verts) * > shader->vertex_size, > + output_ptr + ((i + 1) * next_prim_boundary) * > shader->vertex_size, > + shader->vertex_size * next_verts); > } > vertex_count += current_verts; > } > > +#if 0 > + { > + int i; > + for (i = 0; i < total_verts; ++i) { > + struct vertex_header *vh = (struct vertex_header *)(output_ptr + > shader->vertex_size * i); > + debug_printf("%d) [%f, %f, %f, %f]\n", i, > + vh->data[0][0], vh->data[0][1], vh->data[0][2], > vh->data[0][3]); > + > + } > + } > +#endif > + > prim_idx = 0; > for (i = 0; i < shader->vector_length; ++i) { > int num_prims = shader->llvm_emitted_primitives[i]; > @@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct > draw_geometry_shader *shader, > > output_verts->vertex_size = vertex_size; > output_verts->stride = output_verts->vertex_size; > + /* we allocate exactly one extra vertex per primitive to allow the GS to > emit > + * overflown vertices into some area where they won't harm anyone */ > output_verts->verts = > (struct vertex_header *)MALLOC(output_verts->vertex_size * > max_out_prims * > - shader->max_output_vertices); > + shader->primitive_boundary); > > #if 0 > debug_printf("%s count = %d (in prims # = %d)\n", > @@ -723,6 +749,16 @@ draw_create_geometry_shader(struct draw_context *draw, > TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) > gs->max_output_vertices = gs->info.properties[i].data[0]; > } > + /* Primitive boundary is bigger than max_output_vertices by one, because > + * the specification says that the geometry shader should exit if the > + * number of emitted vertices is bigger or equal to max_output_vertices > and > + * we can't do that because we're running in the SoA mode, which means > that > + * our storing routines will keep getting called on channels that have > + * overflown. > + * So we need some scratch area where we can keep writing the overflown > + * vertices without overwriting anything important or crashing. > + */ > + gs->primitive_boundary = gs->max_output_vertices + 1; > > for (i = 0; i < gs->info.num_outputs; i++) { > if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && > diff --git a/src/gallium/auxiliary/draw/draw_gs.h > b/src/gallium/auxiliary/draw/draw_gs.h > index 7c84139..ca744ce 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.h > +++ b/src/gallium/auxiliary/draw/draw_gs.h > @@ -68,6 +68,7 @@ struct draw_geometry_shader { > unsigned position_output; > > unsigned max_output_vertices; > + unsigned primitive_boundary; > unsigned input_primitive; > unsigned output_primitive; > > diff --git a/src/gallium/auxiliary/draw/draw_llvm.c > b/src/gallium/auxiliary/draw/draw_llvm.c > index 5100ce0..8ecbc88 100644 > --- a/src/gallium/auxiliary/draw/draw_llvm.c > +++ b/src/gallium/auxiliary/draw/draw_llvm.c > @@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct > lp_build_tgsi_gs_iface *gs_base, > LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, > lp_int_type(gs_type), 0); > LLVMValueRef indices[LP_MAX_VECTOR_LENGTH]; > - LLVMValueRef max_output_vertices = > - lp_build_const_int32(gallivm, > variant->shader->base.max_output_vertices); > + LLVMValueRef next_prim_offset = > + lp_build_const_int32(gallivm, > variant->shader->base.primitive_boundary); > LLVMValueRef io = variant->io_ptr; > unsigned i; > const struct tgsi_shader_info *gs_info = &variant->shader->base.info; > @@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct > lp_build_tgsi_gs_iface *gs_base, > LLVMValueRef ind = lp_build_const_int32(gallivm, i); > LLVMValueRef currently_emitted = > LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, ""); > - indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, ""); > + indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, ""); > indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, ""); > } > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > index f1b1d79..0fbb8aa 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h > @@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context > LLVMValueRef emitted_prims_vec_ptr; > LLVMValueRef total_emitted_vertices_vec_ptr; > LLVMValueRef emitted_vertices_vec_ptr; > + LLVMValueRef max_output_vertices_vec; > > LLVMValueRef consts_ptr; > const LLVMValueRef *pos; > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 299d7eb..7cff5fa 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -828,7 +828,6 @@ emit_fetch_gs_input( > vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); > } > > - > res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, > vertex_index, attrib_index, > swizzle_index); > @@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct > lp_build_tgsi_context * bld_base, > LLVMBuildStore(builder, current_vec, ptr); > } > > +static LLVMValueRef > +clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, > + LLVMValueRef current_mask_vec, > + LLVMValueRef total_emitted_vertices_vec) > +{ > + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > + struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; > + LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, > + total_emitted_vertices_vec, > + bld->max_output_vertices_vec); > + > + return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); > +} > + > static void > emit_vertex( > const struct lp_build_tgsi_action * action, > @@ -2270,6 +2283,8 @@ emit_vertex( > LLVMValueRef masked_ones = mask_to_one_vec(bld_base); > LLVMValueRef total_emitted_vertices_vec = > LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); > + masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones, > + > total_emitted_vertices_vec); > gather_outputs(bld); > bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, > bld->outputs, > @@ -2794,12 +2809,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, > bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; > > if (gs_iface) { > + /* There's no specific value for this because it should always > + * be set, but apps using ext_geometry_shader4 quite often > + * were forgetting so we're using MAX_VERTEX_VARYING from > + * that spec even though we could debug_assert if it's not > + * set, but that's a lot uglier. */ > + uint max_output_vertices = 32; > + uint i = 0; > /* inputs are always indirect with gs */ > bld.indirect_files |= (1 << TGSI_FILE_INPUT); > bld.gs_iface = gs_iface; > bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; > bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; > bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; > + > + for (i = 0; i < info->num_properties; ++i) { > + if (info->properties[i].name == > + TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { > + max_output_vertices = info->properties[i].data[0]; > + } > + } > + bld.max_output_vertices_vec = > + lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type, > + max_output_vertices); > } > > lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); > -- > 1.7.10.4 > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev