For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Wed, Nov 9, 2016 at 4:01 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > I'm also sending out a piglit test, gl-2.0/vertexattribpointer-size-3, > which exposes this corner case. > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 26 ++++++++++++++++++++++++-- > src/gallium/drivers/radeonsi/si_state.c | 9 +++++++++ > src/gallium/drivers/radeonsi/si_state.h | 6 ++++++ > 3 files changed, 39 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 9358542..097ffcd 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -957,28 +957,50 @@ bool si_upload_vertex_buffer_descriptors(struct > si_context *sctx) > } > > offset = vb->buffer_offset + ve->src_offset; > va = rbuffer->gpu_address + offset; > > /* Fill in T# buffer resource description */ > desc[0] = va; > desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | > S_008F04_STRIDE(vb->stride); > > - if (sctx->b.chip_class <= CIK && vb->stride) > + if (sctx->b.chip_class <= CIK && vb->stride) { > /* Round up by rounding down and adding 1 */ > desc[2] = (vb->buffer->width0 - offset - > sctx->vertex_elements->format_size[i]) / > vb->stride + 1; > - else > + } else { > + uint32_t size3; > + > desc[2] = vb->buffer->width0 - offset; > > + /* For attributes of size 3 with byte or short > + * components, we use a 4-component data format. > + * > + * As a consequence, we have to round the buffer size > + * up so that the hardware sees four components as > + * being inside the buffer if and only if the first > + * three components are in the buffer. > + * > + * Since the offset and stride are guaranteed to be > + * 4-byte aligned, this alignment will never cross the > + * winsys buffer boundary. > + */ > + size3 = (sctx->vertex_elements->fix_size3 >> (2 * i)) > & 3; > + if (vb->stride && size3) { > + assert(offset % 4 == 0 && vb->stride % 4 == > 0); > + assert(size3 <= 2); > + desc[2] = align(desc[2], size3 * 2); > + } > + } > + > desc[3] = sctx->vertex_elements->rsrc_word3[i]; > > if (!bound[ve->vertex_buffer_index]) { > radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, > (struct > r600_resource*)vb->buffer, > RADEON_USAGE_READ, > RADEON_PRIO_VERTEX_BUFFER); > bound[ve->vertex_buffer_index] = true; > } > } > > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 3430734..f8dfcf2 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -3315,20 +3315,29 @@ static void *si_create_vertex_elements(struct > pipe_context *ctx, > if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { > if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { > v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * > i); > } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SSCALED) { > v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 > * i); > } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SINT) { > /* This isn't actually used in OpenGL. */ > v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * > i); > } > } > + > + /* We work around the fact that 8_8_8 and 16_16_16 data > formats > + * do not exist by using the corresponding 4-component > formats. > + * This requires a fixup of the descriptor for bounds checks. > + */ > + if (desc->block.bits == 3 * 8 || > + desc->block.bits == 3 * 16) { > + v->fix_size3 |= (desc->block.bits / 24) << (2 * i); > + } > } > memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * > count); > > return v; > } > > static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) > { > struct si_context *sctx = (struct si_context *)ctx; > struct si_vertex_element *v = (struct si_vertex_element*)state; > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index c444a69..6bfa4e7 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -93,20 +93,26 @@ struct si_state_dsa { > struct si_stencil_ref { > struct r600_atom atom; > struct pipe_stencil_ref state; > struct si_dsa_stencil_ref_part dsa_part; > }; > > struct si_vertex_element > { > unsigned count; > uint32_t fix_fetch; > + > + /* Two bits per attribute indicating the size of each vector component > + * in bytes if the size 3-workaround must be applied. > + */ > + uint32_t fix_size3; > + > uint32_t rsrc_word3[SI_MAX_ATTRIBS]; > uint32_t format_size[SI_MAX_ATTRIBS]; > struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; > }; > > union si_state { > struct { > struct si_state_blend *blend; > struct si_state_rasterizer *rasterizer; > struct si_state_dsa *dsa; > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev