From: Marek Olšák <marek.ol...@amd.com> v2: now it should be correct --- src/gallium/drivers/radeonsi/si_descriptors.c | 8 ++++---- src/gallium/drivers/radeonsi/si_state.c | 6 ++++++ src/gallium/drivers/radeonsi/si_state.h | 1 + 3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 837f393..391c58b 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -932,29 +932,29 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) { struct si_vertex_element *velems = sctx->vertex_elements; struct si_descriptors *desc = &sctx->vertex_buffers; - bool bound[SI_NUM_VERTEX_BUFFERS] = {}; unsigned i, count = velems->count; uint64_t va; uint32_t *ptr; if (!sctx->vertex_buffers_dirty || !count || !velems) return true; unsigned fix_size3 = velems->fix_size3; + unsigned first_vb_use_mask = velems->first_vb_use_mask; /* Vertex buffer descriptors are the only ones which are uploaded * directly through a staging buffer and don't go through * the fine-grained upload path. */ u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, (void**)&ptr); if (!desc->buffer) return false; @@ -962,23 +962,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); assert(count <= SI_NUM_VERTEX_BUFFERS); for (i = 0; i < count; i++) { struct pipe_vertex_element *ve = &velems->elements[i]; struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; unsigned offset; + unsigned vbo_index = ve->vertex_buffer_index; uint32_t *desc = &ptr[i*4]; - vb = &sctx->vertex_buffer[ve->vertex_buffer_index]; + vb = &sctx->vertex_buffer[vbo_index]; rbuffer = (struct r600_resource*)vb->buffer; if (!rbuffer) { memset(desc, 0, 16); continue; } offset = vb->buffer_offset + ve->src_offset; va = rbuffer->gpu_address + offset; /* Fill in T# buffer resource description */ @@ -1011,25 +1012,24 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) size3 = (fix_size3 >> (2 * i)) & 3; if (vb->stride && size3) { assert(offset % 4 == 0 && vb->stride % 4 == 0); assert(size3 <= 2); desc[2] = align(desc[2], size3 * 2); } } desc[3] = velems->rsrc_word3[i]; - if (!bound[ve->vertex_buffer_index]) { + if (first_vb_use_mask & (1 << i)) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, (struct r600_resource*)vb->buffer, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); - bound[ve->vertex_buffer_index] = true; } } /* Don't flush the const cache. It would have a very negative effect * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); sctx->vertex_buffers_dirty = false; sctx->vertex_buffer_pointer_dirty = true; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3022260..f60a499 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3316,39 +3316,45 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state) /* * Vertex elements & buffers */ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); + bool used[SI_NUM_VERTEX_BUFFERS] = {}; int i; assert(count <= SI_MAX_ATTRIBS); if (!v) return NULL; v->count = count; for (i = 0; i < count; ++i) { const struct util_format_description *desc; const struct util_format_channel_description *channel; unsigned data_format, num_format; int first_non_void; unsigned vbo_index = elements[i].vertex_buffer_index; if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { FREE(v); return NULL; } + if (!used[vbo_index]) { + v->first_vb_use_mask |= 1 << i; + used[vbo_index] = true; + } + desc = util_format_description(elements[i].src_format); first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); channel = &desc->channel[first_non_void]; v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 34a0f57..03e5011 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -92,20 +92,21 @@ struct si_state_dsa { struct si_stencil_ref { struct r600_atom atom; struct pipe_stencil_ref state; struct si_dsa_stencil_ref_part dsa_part; }; struct si_vertex_element { unsigned count; + unsigned first_vb_use_mask; /* Two bits per attribute indicating the size of each vector component * in bytes if the size 3-workaround must be applied. */ uint32_t fix_size3; uint64_t fix_fetch; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint32_t format_size[SI_MAX_ATTRIBS]; struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev