Am 27.05.2015 um 09:45 schrieb Dave Airlie: > This hooks up the geometry shader processing to the TGSI > support added in the previous commits. > > It doesn't change the llvm interface other than to > keep things building. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/auxiliary/draw/draw_gs.c | 195 > +++++++++++++-------- > src/gallium/auxiliary/draw/draw_gs.h | 21 ++- > src/gallium/auxiliary/draw/draw_pt.h | 1 + > .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 16 +- > .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 14 +- > src/gallium/auxiliary/draw/draw_pt_so_emit.c | 64 ++++--- > 6 files changed, 192 insertions(+), 119 deletions(-) > > diff --git a/src/gallium/auxiliary/draw/draw_gs.c > b/src/gallium/auxiliary/draw/draw_gs.c > index 755e527..9798518 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.c > +++ b/src/gallium/auxiliary/draw/draw_gs.c > @@ -75,6 +75,7 @@ draw_gs_should_flush(struct draw_geometry_shader *shader) > /*#define DEBUG_OUTPUTS 1*/ > static void > tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, > + unsigned stream, > unsigned num_primitives, > float (**p_output)[4]) > { > @@ -89,14 +90,16 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, > */ > > for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { > - unsigned num_verts_per_prim = machine->Primitives[prim_idx]; > - shader->primitive_lengths[prim_idx + shader->emitted_primitives] = > - machine->Primitives[prim_idx]; > - shader->emitted_vertices += num_verts_per_prim; > + unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx]; > + > + shader->stream[stream].primitive_lengths[prim_idx + > shader->stream[stream].emitted_primitives] = I'm not really trying to enforce strict 80 column lines, but over 100 is definitely too much. I know there's some other ridiculously long lines in some draw and tgsi code but please try to avoid new ones.
> + machine->Primitives[stream][prim_idx]; > + shader->stream[stream].emitted_vertices += num_verts_per_prim; Did things actually still build/work around here before this patch (that is if you just applied patches up to 3/7)? Looks to me like it might not, if so it would be nice to get this fixed. > for (j = 0; j < num_verts_per_prim; j++, current_idx++) { > - int idx = current_idx * shader->info.num_outputs; > + int idx = machine->PrimitiveOffsets[stream][prim_idx] + current_idx > * shader->info.num_outputs; linelength > #ifdef DEBUG_OUTPUTS > - debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs); > + debug_printf("%d/%d) Output vert:\n", stream, idx / > shader->info.num_outputs); > #endif > for (slot = 0; slot < shader->info.num_outputs; slot++) { > output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; > @@ -115,7 +118,7 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, > } > } > *p_output = output; > - shader->emitted_primitives += num_primitives; > + shader->stream[stream].emitted_primitives += num_primitives; > } > > /*#define DEBUG_INPUTS 1*/ > @@ -201,11 +204,12 @@ static void tgsi_gs_prepare(struct draw_geometry_shader > *shader, > } > } > > -static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, > - unsigned input_primitives) > +static void tgsi_gs_run(struct draw_geometry_shader *shader, > + unsigned input_primitives, > + unsigned *out_prims) whitespace? > { > struct tgsi_exec_machine *machine = shader->machine; > - > + int i; > tgsi_set_exec_mask(machine, > 1, > input_primitives > 1, > @@ -215,8 +219,30 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader > *shader, > /* run interpreter */ > tgsi_exec_machine_run(machine); > > - return > - > machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; > + for (i = 0; i < 4; i++) { > + int prim_i; > + int prim_c; > + switch (i) { > + case 0: > + prim_i = TGSI_EXEC_TEMP_PRIMITIVE_I; > + prim_c = TGSI_EXEC_TEMP_PRIMITIVE_C; > + break; > + case 1: > + prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S1_I; > + prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S1_C; > + break; > + case 2: > + prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S2_I; > + prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S2_C; > + break; > + case 3: > + prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S3_I; > + prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S3_C; > + break; > + }; > + > + out_prims[i] = machine->Temps[prim_i].xyzw[prim_c].u[0]; > + } > } > > #ifdef HAVE_LLVM > @@ -293,6 +319,7 @@ llvm_fetch_gs_input(struct draw_geometry_shader *shader, > > static void > llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, > + unsigned stream, > unsigned num_primitives, > float (**p_output)[4]) > { > @@ -313,7 +340,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, > total_verts += shader->llvm_emitted_vertices[i]; > } > > - output_ptr += shader->emitted_vertices * shader->vertex_size; > + output_ptr += shader->stream[0].emitted_vertices * shader->vertex_size; > for (i = 0; i < shader->vector_length - 1; ++i) { > int current_verts = shader->llvm_emitted_vertices[i]; > int next_verts = shader->llvm_emitted_vertices[i + 1]; > @@ -360,14 +387,14 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader > *shader, > for (j = 0; j < num_prims; ++j) { > int prim_length = > shader->llvm_prim_lengths[j][i]; > - shader->primitive_lengths[shader->emitted_primitives + prim_idx] = > + > shader->stream[0].primitive_lengths[shader->stream[0].emitted_primitives + > prim_idx] = > prim_length; > ++prim_idx; > } > } > > - shader->emitted_primitives += total_prims; > - shader->emitted_vertices += total_verts; > + shader->stream[0].emitted_primitives += total_prims; > + shader->stream[0].emitted_vertices += total_verts; > } > > static void > @@ -377,14 +404,14 @@ llvm_gs_prepare(struct draw_geometry_shader *shader, > { > } > > -static unsigned > +static void > llvm_gs_run(struct draw_geometry_shader *shader, > - unsigned input_primitives) > + unsigned input_primitives, unsigned *out_prims) > { > unsigned ret; > char *input = (char*)shader->gs_output; > > - input += (shader->emitted_vertices * shader->vertex_size); > + input += (shader->stream[0].emitted_vertices * shader->vertex_size); > > ret = shader->current_variant->jit_func( > shader->jit_context, shader->gs_input->data, > @@ -393,15 +420,15 @@ llvm_gs_run(struct draw_geometry_shader *shader, > shader->draw->instance_id, > shader->llvm_prim_ids); > > - return ret; > + *out_prims = ret; > } > > #endif > > static void gs_flush(struct draw_geometry_shader *shader) > { > - unsigned out_prim_count; > - > + unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS]; > + unsigned i; > unsigned input_primitives = shader->fetched_prim_count; > > if (shader->draw->collect_statistics) { > @@ -411,14 +438,19 @@ static void gs_flush(struct draw_geometry_shader > *shader) > debug_assert(input_primitives > 0 && > input_primitives <= 4); > > - out_prim_count = shader->run(shader, input_primitives); > - shader->fetch_outputs(shader, out_prim_count, > - &shader->tmp_output); > + shader->run(shader, input_primitives, out_prim_count); > + for (i = 0; i < shader->num_vertex_streams; i++) { > + shader->fetch_outputs(shader, i, out_prim_count[i], > + &shader->stream[i].tmp_output); > + } > > #if 0 > - debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", > - shader->emitted_primitives, shader->emitted_vertices, > - out_prim_count); > + for (i = 0; i < shader->num_vertex_streams; i++) { > + debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim > count = %d\n", > + i, > + shader->stream[i].emitted_primitives, > shader->stream[i].emitted_vertices, > + out_prim_count[i]); > + } > #endif > > shader->fetched_prim_count = 0; > @@ -562,16 +594,19 @@ int draw_geometry_shader_run(struct > draw_geometry_shader *shader, > unsigned total_verts_per_buffer = shader->primitive_boundary * > num_in_primitives; > unsigned invocation; > + int i; > //Assume at least one primitive > max_out_prims = MAX2(max_out_prims, 1); > > - > - output_verts->vertex_size = vertex_size; > - output_verts->stride = output_verts->vertex_size; > - output_verts->verts = > - (struct vertex_header *)MALLOC(output_verts->vertex_size * > - total_verts_per_buffer * > shader->num_invocations); > - debug_assert(output_verts->verts); > + for (i = 0; i < shader->num_vertex_streams; i++) { > + /* write all the vertex data into all the streams */ > + output_verts[i].vertex_size = vertex_size; > + output_verts[i].stride = output_verts[i].vertex_size; > + output_verts[i].verts = > + (struct vertex_header *)MALLOC(output_verts[i].vertex_size * > + total_verts_per_buffer * > shader->num_invocations); > + debug_assert(output_verts[i].verts); > + } > > #if 0 > debug_printf("%s count = %d (in prims # = %d)\n", > @@ -589,21 +624,22 @@ int draw_geometry_shader_run(struct > draw_geometry_shader *shader, > total_verts_per_buffer); > #endif > > - shader->emitted_vertices = 0; > - shader->emitted_primitives = 0; > + for (i = 0; i < shader->num_vertex_streams; i++) { > + shader->stream[i].emitted_vertices = 0; > + shader->stream[i].emitted_primitives = 0; > + FREE(shader->stream[i].primitive_lengths); > + shader->stream[i].primitive_lengths = MALLOC(max_out_prims * > sizeof(unsigned) * shader->num_invocations); linelength > + shader->stream[i].tmp_output = (float > (*)[4])output_verts[i].verts->data; > + } > shader->vertex_size = vertex_size; > - shader->tmp_output = (float (*)[4])output_verts->verts->data; > shader->fetched_prim_count = 0; > shader->input_vertex_stride = input_stride; > shader->input = input; > shader->input_info = input_info; > - FREE(shader->primitive_lengths); > - shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * > shader->num_invocations); > - > > #ifdef HAVE_LLVM > if (shader->draw->llvm) { > - shader->gs_output = output_verts->verts; > + shader->gs_output = output_verts[0].verts; > if (max_out_prims > shader->max_out_prims) { > unsigned i; > if (shader->llvm_prim_lengths) { > @@ -651,32 +687,34 @@ int draw_geometry_shader_run(struct > draw_geometry_shader *shader, > > /* Update prim_info: > */ > - output_prims->linear = TRUE; > - output_prims->elts = NULL; > - output_prims->start = 0; > - output_prims->count = shader->emitted_vertices; > - output_prims->prim = shader->output_primitive; > - output_prims->flags = 0x0; > - output_prims->primitive_lengths = shader->primitive_lengths; > - output_prims->primitive_count = shader->emitted_primitives; > - output_verts->count = shader->emitted_vertices; > - > - if (shader->draw->collect_statistics) { > - unsigned i; > - for (i = 0; i < shader->emitted_primitives; ++i) { > - shader->draw->statistics.gs_primitives += > - u_decomposed_prims_for_vertices(shader->output_primitive, > - shader->primitive_lengths[i]); > + for (i = 0; i < shader->num_vertex_streams; i++) { > + output_prims[i].linear = TRUE; > + output_prims[i].elts = NULL; > + output_prims[i].start = 0; > + output_prims[i].count = shader->stream[i].emitted_vertices; > + output_prims[i].prim = shader->output_primitive; > + output_prims[i].flags = 0x0; > + output_prims[i].primitive_lengths = > shader->stream[i].primitive_lengths; > + output_prims[i].primitive_count = shader->stream[i].emitted_primitives; > + output_verts[i].count = shader->stream[i].emitted_vertices; > + > + if (shader->draw->collect_statistics) { > + unsigned i; > + for (i = 0; i < shader->stream[i].emitted_primitives; ++i) { > + shader->draw->statistics.gs_primitives += > + u_decomposed_prims_for_vertices(shader->output_primitive, > + > shader->stream[i].primitive_lengths[i]); > + } > } > } > > #if 0 > - debug_printf("GS finished, prims = %d, verts = %d\n", > - output_prims->primitive_count, > - output_verts->count); > + debug_printf("GS finished\n"); > + for (i = 0; i < 4; i++) > + debug_printf("stream %d: prims = %d verts = %d\n", i, > output_prims[i].primitive_count, output_verts[i].count); must be a new record linelength > #endif > > - return shader->emitted_vertices; > + return 0; > } > > void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, > @@ -695,16 +733,23 @@ boolean > draw_gs_init( struct draw_context *draw ) > { > if (!draw->llvm) { > + int i; > draw->gs.tgsi.machine = tgsi_exec_machine_create(); > if (!draw->gs.tgsi.machine) > return FALSE; > > - draw->gs.tgsi.machine->Primitives = align_malloc( > - MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); > - if (!draw->gs.tgsi.machine->Primitives) > - return FALSE; > - memset(draw->gs.tgsi.machine->Primitives, 0, > - MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); > + for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { > + draw->gs.tgsi.machine->Primitives[i] = align_malloc( > + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); > + draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc( > + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); > + if (!draw->gs.tgsi.machine->Primitives[i] || > !draw->gs.tgsi.machine->PrimitiveOffsets) > + return FALSE; > + memset(draw->gs.tgsi.machine->Primitives[i], 0, > + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); > + memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0, > + MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); > + } > } > > return TRUE; > @@ -712,8 +757,10 @@ draw_gs_init( struct draw_context *draw ) > > void draw_gs_destroy( struct draw_context *draw ) > { > + int i; > if (draw->gs.tgsi.machine) { > - align_free(draw->gs.tgsi.machine->Primitives); > + for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) > + align_free(draw->gs.tgsi.machine->Primitives[i]); > tgsi_exec_machine_destroy(draw->gs.tgsi.machine); > } > } > @@ -817,6 +864,12 @@ draw_create_geometry_shader(struct draw_context *draw, > > gs->machine = draw->gs.tgsi.machine; > > + gs->num_vertex_streams = 1; > + for (i = 0; i < gs->state.stream_output.num_outputs; i++) { > + if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams) > + gs->num_vertex_streams = gs->state.stream_output.output[i].stream + > 1; > + } > + > #ifdef HAVE_LLVM > if (use_llvm) { > int vector_size = gs->vector_length * sizeof(float); > @@ -872,6 +925,7 @@ void draw_bind_geometry_shader(struct draw_context *draw, > void draw_delete_geometry_shader(struct draw_context *draw, > struct draw_geometry_shader *dgs) > { > + int i; > if (!dgs) { > return; > } > @@ -904,7 +958,8 @@ void draw_delete_geometry_shader(struct draw_context > *draw, > } > #endif > > - FREE(dgs->primitive_lengths); > + for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) > + FREE(dgs->stream[i].primitive_lengths); > FREE((void*) dgs->state.tokens); > FREE(dgs); > } > diff --git a/src/gallium/auxiliary/draw/draw_gs.h > b/src/gallium/auxiliary/draw/draw_gs.h > index 663ba84..c79c6d7 100644 > --- a/src/gallium/auxiliary/draw/draw_gs.h > +++ b/src/gallium/auxiliary/draw/draw_gs.h > @@ -56,6 +56,13 @@ struct draw_gs_inputs { > /** > * Private version of the compiled geometry shader > */ > +struct draw_vertex_stream { > + unsigned *primitive_lengths; > + unsigned emitted_vertices; > + unsigned emitted_primitives; > + float (*tmp_output)[4]; > +}; > + > struct draw_geometry_shader { > struct draw_context *draw; > > @@ -74,14 +81,11 @@ struct draw_geometry_shader { > unsigned primitive_boundary; > unsigned input_primitive; > unsigned output_primitive; > - > - unsigned *primitive_lengths; > - unsigned emitted_vertices; > - unsigned emitted_primitives; > - > - float (*tmp_output)[4]; > unsigned vertex_size; > > + struct draw_vertex_stream stream[TGSI_MAX_VERTEX_STREAMS]; > + unsigned num_vertex_streams; > + > unsigned in_prim_idx; > unsigned input_vertex_stride; > unsigned fetched_prim_count; > @@ -109,14 +113,15 @@ struct draw_geometry_shader { > unsigned num_vertices, > unsigned prim_idx); > void (*fetch_outputs)(struct draw_geometry_shader *shader, > + unsigned vertex_stream, > unsigned num_primitives, > float (**p_output)[4]); > > void (*prepare)(struct draw_geometry_shader *shader, > const void *constants[PIPE_MAX_CONSTANT_BUFFERS], > const unsigned > constants_size[PIPE_MAX_CONSTANT_BUFFERS]); > - unsigned (*run)(struct draw_geometry_shader *shader, > - unsigned input_primitives); > + void (*run)(struct draw_geometry_shader *shader, > + unsigned input_primitives, unsigned *out_prims); > }; > > void draw_geometry_shader_new_instance(struct draw_geometry_shader *gs); > diff --git a/src/gallium/auxiliary/draw/draw_pt.h > b/src/gallium/auxiliary/draw/draw_pt.h > index cb9a1b3..0052752 100644 > --- a/src/gallium/auxiliary/draw/draw_pt.h > +++ b/src/gallium/auxiliary/draw/draw_pt.h > @@ -188,6 +188,7 @@ struct pt_so_emit; > void draw_pt_so_emit_prepare(struct pt_so_emit *emit, boolean > use_pre_clip_pos); > > void draw_pt_so_emit( struct pt_so_emit *emit, > + int num_vertex_streams, > const struct draw_vertex_info *vert_info, > const struct draw_prim_info *prim_info ); > > diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > index 5af845f..5c74455 100644 > --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > @@ -235,16 +235,17 @@ fetch_pipeline_generic(struct draw_pt_middle_end > *middle, > struct draw_context *draw = fpme->draw; > struct draw_vertex_shader *vshader = draw->vs.vertex_shader; > struct draw_geometry_shader *gshader = draw->gs.geometry_shader; > - struct draw_prim_info gs_prim_info; > + struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS]; > struct draw_vertex_info fetched_vert_info; > struct draw_vertex_info vs_vert_info; > - struct draw_vertex_info gs_vert_info; > + struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS]; > struct draw_vertex_info *vert_info; > struct draw_prim_info ia_prim_info; > struct draw_vertex_info ia_vert_info; > const struct draw_prim_info *prim_info = in_prim_info; > boolean free_prim_info = FALSE; > unsigned opt = fpme->opt; > + int num_vertex_streams = 1; > > fetched_vert_info.count = fetch_info->count; > fetched_vert_info.vertex_size = fpme->vertex_size; > @@ -293,12 +294,13 @@ fetch_pipeline_generic(struct draw_pt_middle_end > *middle, > vert_info, > prim_info, > &vshader->info, > - &gs_vert_info, > - &gs_prim_info); > + gs_vert_info, > + gs_prim_info); > > FREE(vert_info->verts); > - vert_info = &gs_vert_info; > - prim_info = &gs_prim_info; > + vert_info = &gs_vert_info[0]; > + prim_info = &gs_prim_info[0]; > + num_vertex_streams = TGSI_MAX_VERTEX_STREAMS; > } else { > if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) { > draw_prim_assembler_run(draw, prim_info, vert_info, > @@ -328,7 +330,7 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle, > * XXX: Stream output surely needs to respect the prim_info->elt > * lists. > */ > - draw_pt_so_emit( fpme->so_emit, vert_info, prim_info ); > + draw_pt_so_emit( fpme->so_emit, num_vertex_streams, vert_info, prim_info > ); > > draw_stats_clipper_primitives(draw, prim_info); > > diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c > b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c > index d17d695..31097e0 100644 > --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c > +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c > @@ -344,9 +344,9 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, > struct llvm_middle_end *fpme = llvm_middle_end(middle); > struct draw_context *draw = fpme->draw; > struct draw_geometry_shader *gshader = draw->gs.geometry_shader; > - struct draw_prim_info gs_prim_info; > + struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS]; > struct draw_vertex_info llvm_vert_info; > - struct draw_vertex_info gs_vert_info; > + struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS]; > struct draw_vertex_info *vert_info; > struct draw_prim_info ia_prim_info; > struct draw_vertex_info ia_vert_info; > @@ -410,12 +410,12 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, > vert_info, > prim_info, > &vshader->info, > - &gs_vert_info, > - &gs_prim_info); > + gs_vert_info, > + gs_prim_info); > > FREE(vert_info->verts); > - vert_info = &gs_vert_info; > - prim_info = &gs_prim_info; > + vert_info = &gs_vert_info[0]; > + prim_info = &gs_prim_info[0]; > } else { > if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) { > draw_prim_assembler_run(draw, prim_info, vert_info, > @@ -440,7 +440,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, > } > > /* stream output needs to be done before clipping */ > - draw_pt_so_emit( fpme->so_emit, vert_info, prim_info ); > + draw_pt_so_emit( fpme->so_emit, 1, vert_info, prim_info ); > > draw_stats_clipper_primitives(draw, prim_info); > > diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c > b/src/gallium/auxiliary/draw/draw_pt_so_emit.c > index 581e2d6..08121a3 100644 > --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c > +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c > @@ -49,6 +49,7 @@ struct pt_so_emit { > int pos_idx; > unsigned emitted_primitives; > unsigned generated_primitives; > + unsigned stream; > }; > > static const struct pipe_stream_output_info * > @@ -144,6 +145,9 @@ static void so_emit_prim(struct pt_so_emit *so, > int ob = state->output[slot].output_buffer; > unsigned dst_offset = state->output[slot].dst_offset * > sizeof(float); > unsigned write_size = num_comps * sizeof(float); > + > + if (state->output[slot].stream != so->stream) > + continue; > /* If a buffer is missing then that's equivalent to > * an overflow */ > if (!draw->so.targets[ob]) { > @@ -175,7 +179,10 @@ static void so_emit_prim(struct pt_so_emit *so, > unsigned idx = state->output[slot].register_index; > unsigned start_comp = state->output[slot].start_component; > unsigned num_comps = state->output[slot].num_components; > + unsigned stream = state->output[slot].stream; > > + if (stream != so->stream) > + continue; > ob = state->output[slot].output_buffer; > buffer_written[ob] = TRUE; > > @@ -184,7 +191,7 @@ static void so_emit_prim(struct pt_so_emit *so, > draw->so.targets[ob]->internal_offset) + > state->output[slot].dst_offset; > > - if (idx == so->pos_idx && pcp_ptr) > + if (idx == so->pos_idx && pcp_ptr && so->stream == 0) > memcpy(buffer, &pre_clip_pos[start_comp], > num_comps * sizeof(float)); > else > @@ -193,8 +200,8 @@ static void so_emit_prim(struct pt_so_emit *so, > #if 0 > { > int j; > - debug_printf("VERT[%d], offset = %d, slot[%d] sc = %d, num_c = > %d, idx = %d = [", > - i, > + debug_printf("VERT[%d], stream = %d, offset = %d, slot[%d] sc = > %d, num_c = %d, idx = %d = [", > + i, stream, > draw->so.targets[ob]->internal_offset, > slot, start_comp, num_comps, idx); > for (j = 0; j < num_comps; ++j) { > @@ -258,12 +265,13 @@ static void so_tri(struct pt_so_emit *so, int i0, int > i1, int i2) > > > void draw_pt_so_emit( struct pt_so_emit *emit, > + int num_vertex_streams, > const struct draw_vertex_info *input_verts, > const struct draw_prim_info *input_prims ) > { > struct draw_context *draw = emit->draw; > struct vbuf_render *render = draw->render; > - unsigned start, i; > + unsigned start, i, stream; > > if (!emit->has_so) > return; > @@ -271,34 +279,36 @@ void draw_pt_so_emit( struct pt_so_emit *emit, > if (!draw->so.num_targets) > return; > > - emit->emitted_primitives = 0; > - emit->generated_primitives = 0; > - emit->input_vertex_stride = input_verts->stride; > - if (emit->use_pre_clip_pos) > - emit->pre_clip_pos = input_verts->verts->pre_clip_pos; > - > - emit->inputs = (const float (*)[4])input_verts->verts->data; > - > /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ > draw_do_flush( draw, DRAW_FLUSH_BACKEND ); > > - for (start = i = 0; i < input_prims->primitive_count; > - start += input_prims->primitive_lengths[i], i++) > - { > - unsigned count = input_prims->primitive_lengths[i]; > - > - if (input_prims->linear) { > - so_run_linear(emit, input_prims, input_verts, > - start, count); > - } else { > - so_run_elts(emit, input_prims, input_verts, > - start, count); > + for (stream = 0; stream < num_vertex_streams; stream++) { > + emit->emitted_primitives = 0; > + emit->generated_primitives = 0; > + if (emit->use_pre_clip_pos) > + emit->pre_clip_pos = input_verts[stream].verts->pre_clip_pos; > + > + emit->input_vertex_stride = input_verts[stream].stride; > + emit->inputs = (const float (*)[4])input_verts[stream].verts->data; > + emit->stream = stream; > + for (start = i = 0; i < input_prims[stream].primitive_count; > + start += input_prims[stream].primitive_lengths[i], i++) > + { > + unsigned count = input_prims[stream].primitive_lengths[i]; > + > + if (input_prims->linear) { > + so_run_linear(emit, &input_prims[stream], &input_verts[stream], > + start, count); > + } else { > + so_run_elts(emit, &input_prims[stream], &input_verts[stream], > + start, count); > + } > } > + render->set_stream_output_info(render, > + stream, > + emit->emitted_primitives, > + emit->generated_primitives); > } > - > - render->set_stream_output_info(render, 0, > - emit->emitted_primitives, > - emit->generated_primitives); > } > > > Overall, this doesn't look particularly elegant, but I can't think of something better, it's probably inherent to how multiple streams work. So, other than the things I mentioned, for the series: Reviewed-by: Roland Scheidegger <srol...@vmware.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev