Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> > On Nov 29, 2016, at 8:18 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > > We need to take the instance divisor and number of instances into > account for instanced client-side arrays, rather than the vertex > parameters. > > Loosely based on the comparable nvc0 logic. > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > --- > src/gallium/drivers/swr/swr_state.cpp | 74 +++++++++++++++++++++++------------ > src/gallium/drivers/swr/swr_state.h | 2 + > 2 files changed, 52 insertions(+), 24 deletions(-) > > diff --git a/src/gallium/drivers/swr/swr_state.cpp > b/src/gallium/drivers/swr/swr_state.cpp > index f1cf4ae..9f6b5b0 100644 > --- a/src/gallium/drivers/swr/swr_state.cpp > +++ b/src/gallium/drivers/swr/swr_state.cpp > @@ -494,6 +494,15 @@ swr_create_vertex_elements_state(struct pipe_context > *pipe, > const SWR_FORMAT_INFO &swr_desc = GetFormatInfo( > mesa_to_swr_format(attribs[i].src_format)); > velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp; > + > + if (attribs[i].instance_divisor != 0) { > + velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index; > + uint32_t *min_instance_div = > + &velems->min_instance_div[attribs[i].vertex_buffer_index]; > + if (!*min_instance_div || > + attribs[i].instance_divisor < *min_instance_div) > + *min_instance_div = attribs[i].instance_divisor; > + } > } > } > > @@ -870,6 +879,34 @@ swr_change_rt(struct swr_context *ctx, > return need_fence; > } > > +static inline void > +swr_user_vbuf_range(const struct pipe_draw_info *info, > + const struct swr_vertex_element_state *velems, > + const struct pipe_vertex_buffer *vb, > + uint32_t i, > + uint32_t *totelems, > + uint32_t *base, > + uint32_t *size) > +{ > + /* FIXME: The size is too large - we don't access the full extra stride. > */ > + unsigned elems; > + if (velems->instanced_bufs & (1U << i)) { > + elems = info->instance_count / velems->min_instance_div[i] + 1; > + *totelems = info->start_instance + elems; > + *base = info->start_instance * vb->stride; > + *size = elems * vb->stride; > + } else if (vb->stride) { > + elems = info->max_index - info->min_index + 1; > + *totelems = info->max_index + 1; > + *base = info->min_index * vb->stride; > + *size = elems * vb->stride; > + } else { > + *totelems = 1; > + *base = 0; > + *size = velems->stream_pitch[i]; > + } > +} > + > void > swr_update_derived(struct pipe_context *pipe, > const struct pipe_draw_info *p_draw_info) > @@ -1039,8 +1076,7 @@ swr_update_derived(struct pipe_context *pipe, > /* Set vertex & index buffers */ > /* (using draw info if called by swr_draw_vbo) */ > if (ctx->dirty & SWR_NEW_VERTEX) { > - uint32_t size, pitch, max_vertex, partial_inbounds, scratch_total; > - const uint8_t *p_data; > + uint32_t scratch_total; > uint8_t *scratch = NULL; > > /* If being called by swr_draw_vbo, copy draw details */ > @@ -1056,14 +1092,8 @@ swr_update_derived(struct pipe_context *pipe, > if (!vb->user_buffer) > continue; > > - if (vb->stride) { > - size = (info.max_index - info.min_index + 1) * vb->stride; > - } else { > - /* pitch = 0, means constant value > - * set size to 1 vertex */ > - size = ctx->velems->stream_pitch[i]; > - } > - > + uint32_t elems, base, size; > + swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, > &size); > scratch_total += AlignUp(size, 4); > } > > @@ -1075,6 +1105,8 @@ swr_update_derived(struct pipe_context *pipe, > /* vertex buffers */ > SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; > for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { > + uint32_t size, pitch, elems, partial_inbounds; > + const uint8_t *p_data; > struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; > > pitch = vb->stride; > @@ -1083,7 +1115,7 @@ swr_update_derived(struct pipe_context *pipe, > * size is based on buffer->width0 rather than info.max_index > * to prevent having to validate VBO on each draw */ > size = vb->buffer->width0; > - max_vertex = size / pitch; > + elems = size / pitch; > partial_inbounds = size % pitch; > > p_data = swr_resource_data(vb->buffer) + vb->buffer_offset; > @@ -1093,25 +1125,17 @@ swr_update_derived(struct pipe_context *pipe, > * revalidate on each draw */ > post_update_dirty_flags |= SWR_NEW_VERTEX; > > - if (pitch) { > - size = (info.max_index - info.min_index + 1) * pitch; > - } else { > - /* pitch = 0, means constant value > - * set size to 1 vertex */ > - size = ctx->velems->stream_pitch[i]; > - } > - > - max_vertex = info.max_index + 1; > + uint32_t base; > + swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, > &size); > partial_inbounds = 0; > > /* Copy only needed vertices to scratch space */ > size = AlignUp(size, 4); > - const void *ptr = (const uint8_t *) vb->user_buffer > - + info.min_index * pitch; > + const void *ptr = (const uint8_t *) vb->user_buffer + base; > memcpy(scratch, ptr, size); > ptr = scratch; > scratch += size; > - p_data = (const uint8_t *)ptr - info.min_index * pitch; > + p_data = (const uint8_t *)ptr - base; > } > > swrVertexBuffers[i] = {0}; > @@ -1119,7 +1143,7 @@ swr_update_derived(struct pipe_context *pipe, > swrVertexBuffers[i].pitch = pitch; > swrVertexBuffers[i].pData = p_data; > swrVertexBuffers[i].size = size; > - swrVertexBuffers[i].maxVertex = max_vertex; > + swrVertexBuffers[i].maxVertex = elems; > swrVertexBuffers[i].partialInboundsSize = partial_inbounds; > } > > @@ -1129,6 +1153,8 @@ swr_update_derived(struct pipe_context *pipe, > /* index buffer, if required (info passed in by swr_draw_vbo) */ > SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */ > if (info.indexed) { > + const uint8_t *p_data; > + uint32_t size, pitch; > struct pipe_index_buffer *ib = &ctx->index_buffer; > > pitch = ib->index_size ? ib->index_size : sizeof(uint32_t); > diff --git a/src/gallium/drivers/swr/swr_state.h > b/src/gallium/drivers/swr/swr_state.h > index 17d4873..1960abd 100644 > --- a/src/gallium/drivers/swr/swr_state.h > +++ b/src/gallium/drivers/swr/swr_state.h > @@ -72,6 +72,8 @@ struct swr_vertex_element_state { > FETCH_COMPILE_STATE fsState; > PFN_FETCH_FUNC fsFunc; > uint32_t stream_pitch[PIPE_MAX_ATTRIBS]; > + uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; > + uint32_t instanced_bufs; > }; > > struct swr_blend_state { > -- > 2.7.3 >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev