Some code that was placed in brw_draw_upload.c and exported to be used by gen8+ was also moved to genX_state_upload, and the respective symbols are not exported anymore.
v2: - Remove code from brw_draw_upload too - Emit vertices for gen4-5 too. - Use helper to setup brw_address (Kristian) - Use macros for MOCS values. - Do not use #ifndef NDEBUG on code that is actually used (Ken) Signed-off-by: Rafael Antognolli <rafael.antogno...@intel.com> --- src/mesa/drivers/dri/i965/brw_context.h | 6 +- src/mesa/drivers/dri/i965/brw_draw.h | 2 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 1123 +---------------- src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 330 +----- src/mesa/drivers/dri/i965/genX_state_upload.c | 1241 +++++++++++++++++- 6 files changed, 1236 insertions(+), 1468 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c7d6e49..8bd8863 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1396,10 +1396,6 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_vs.c */ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); -/* brw_draw_upload.c */ -unsigned brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_vertex_array *glarray); - static inline unsigned brw_get_index_type(unsigned index_size) { @@ -1409,8 +1405,6 @@ brw_get_index_type(unsigned index_size) return (index_size >> 1) << 8; } -void brw_prepare_vertices(struct brw_context *brw); - /* brw_wm_surface_state.c */ void brw_init_surface_formats(struct brw_context *brw); void brw_create_constant_surface(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 3b99915..7fbe363 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -58,8 +58,6 @@ void brw_draw_prims(struct gl_context *ctx, void brw_draw_init( struct brw_context *brw ); void brw_draw_destroy( struct brw_context *brw ); -void brw_prepare_shader_draw_parameters(struct brw_context *); - /* brw_primitive_restart.c */ GLboolean brw_handle_primitive_restart(struct gl_context *ctx, diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 7846293..2fb4d5d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -37,1129 +37,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -static const GLuint double_types_float[5] = { - 0, - ISL_FORMAT_R64_FLOAT, - ISL_FORMAT_R64G64_FLOAT, - ISL_FORMAT_R64G64B64_FLOAT, - ISL_FORMAT_R64G64B64A64_FLOAT -}; - -static const GLuint double_types_passthru[5] = { - 0, - ISL_FORMAT_R64_PASSTHRU, - ISL_FORMAT_R64G64_PASSTHRU, - ISL_FORMAT_R64G64B64_PASSTHRU, - ISL_FORMAT_R64G64B64A64_PASSTHRU -}; - -static const GLuint float_types[5] = { - 0, - ISL_FORMAT_R32_FLOAT, - ISL_FORMAT_R32G32_FLOAT, - ISL_FORMAT_R32G32B32_FLOAT, - ISL_FORMAT_R32G32B32A32_FLOAT -}; - -static const GLuint half_float_types[5] = { - 0, - ISL_FORMAT_R16_FLOAT, - ISL_FORMAT_R16G16_FLOAT, - ISL_FORMAT_R16G16B16_FLOAT, - ISL_FORMAT_R16G16B16A16_FLOAT -}; - -static const GLuint fixed_point_types[5] = { - 0, - ISL_FORMAT_R32_SFIXED, - ISL_FORMAT_R32G32_SFIXED, - ISL_FORMAT_R32G32B32_SFIXED, - ISL_FORMAT_R32G32B32A32_SFIXED, -}; - -static const GLuint uint_types_direct[5] = { - 0, - ISL_FORMAT_R32_UINT, - ISL_FORMAT_R32G32_UINT, - ISL_FORMAT_R32G32B32_UINT, - ISL_FORMAT_R32G32B32A32_UINT -}; - -static const GLuint uint_types_norm[5] = { - 0, - ISL_FORMAT_R32_UNORM, - ISL_FORMAT_R32G32_UNORM, - ISL_FORMAT_R32G32B32_UNORM, - ISL_FORMAT_R32G32B32A32_UNORM -}; - -static const GLuint uint_types_scale[5] = { - 0, - ISL_FORMAT_R32_USCALED, - ISL_FORMAT_R32G32_USCALED, - ISL_FORMAT_R32G32B32_USCALED, - ISL_FORMAT_R32G32B32A32_USCALED -}; - -static const GLuint int_types_direct[5] = { - 0, - ISL_FORMAT_R32_SINT, - ISL_FORMAT_R32G32_SINT, - ISL_FORMAT_R32G32B32_SINT, - ISL_FORMAT_R32G32B32A32_SINT -}; - -static const GLuint int_types_norm[5] = { - 0, - ISL_FORMAT_R32_SNORM, - ISL_FORMAT_R32G32_SNORM, - ISL_FORMAT_R32G32B32_SNORM, - ISL_FORMAT_R32G32B32A32_SNORM -}; - -static const GLuint int_types_scale[5] = { - 0, - ISL_FORMAT_R32_SSCALED, - ISL_FORMAT_R32G32_SSCALED, - ISL_FORMAT_R32G32B32_SSCALED, - ISL_FORMAT_R32G32B32A32_SSCALED -}; - -static const GLuint ushort_types_direct[5] = { - 0, - ISL_FORMAT_R16_UINT, - ISL_FORMAT_R16G16_UINT, - ISL_FORMAT_R16G16B16_UINT, - ISL_FORMAT_R16G16B16A16_UINT -}; - -static const GLuint ushort_types_norm[5] = { - 0, - ISL_FORMAT_R16_UNORM, - ISL_FORMAT_R16G16_UNORM, - ISL_FORMAT_R16G16B16_UNORM, - ISL_FORMAT_R16G16B16A16_UNORM -}; - -static const GLuint ushort_types_scale[5] = { - 0, - ISL_FORMAT_R16_USCALED, - ISL_FORMAT_R16G16_USCALED, - ISL_FORMAT_R16G16B16_USCALED, - ISL_FORMAT_R16G16B16A16_USCALED -}; - -static const GLuint short_types_direct[5] = { - 0, - ISL_FORMAT_R16_SINT, - ISL_FORMAT_R16G16_SINT, - ISL_FORMAT_R16G16B16_SINT, - ISL_FORMAT_R16G16B16A16_SINT -}; - -static const GLuint short_types_norm[5] = { - 0, - ISL_FORMAT_R16_SNORM, - ISL_FORMAT_R16G16_SNORM, - ISL_FORMAT_R16G16B16_SNORM, - ISL_FORMAT_R16G16B16A16_SNORM -}; - -static const GLuint short_types_scale[5] = { - 0, - ISL_FORMAT_R16_SSCALED, - ISL_FORMAT_R16G16_SSCALED, - ISL_FORMAT_R16G16B16_SSCALED, - ISL_FORMAT_R16G16B16A16_SSCALED -}; - -static const GLuint ubyte_types_direct[5] = { - 0, - ISL_FORMAT_R8_UINT, - ISL_FORMAT_R8G8_UINT, - ISL_FORMAT_R8G8B8_UINT, - ISL_FORMAT_R8G8B8A8_UINT -}; - -static const GLuint ubyte_types_norm[5] = { - 0, - ISL_FORMAT_R8_UNORM, - ISL_FORMAT_R8G8_UNORM, - ISL_FORMAT_R8G8B8_UNORM, - ISL_FORMAT_R8G8B8A8_UNORM -}; - -static const GLuint ubyte_types_scale[5] = { - 0, - ISL_FORMAT_R8_USCALED, - ISL_FORMAT_R8G8_USCALED, - ISL_FORMAT_R8G8B8_USCALED, - ISL_FORMAT_R8G8B8A8_USCALED -}; - -static const GLuint byte_types_direct[5] = { - 0, - ISL_FORMAT_R8_SINT, - ISL_FORMAT_R8G8_SINT, - ISL_FORMAT_R8G8B8_SINT, - ISL_FORMAT_R8G8B8A8_SINT -}; - -static const GLuint byte_types_norm[5] = { - 0, - ISL_FORMAT_R8_SNORM, - ISL_FORMAT_R8G8_SNORM, - ISL_FORMAT_R8G8B8_SNORM, - ISL_FORMAT_R8G8B8A8_SNORM -}; - -static const GLuint byte_types_scale[5] = { - 0, - ISL_FORMAT_R8_SSCALED, - ISL_FORMAT_R8G8_SSCALED, - ISL_FORMAT_R8G8B8_SSCALED, - ISL_FORMAT_R8G8B8A8_SSCALED -}; - -static GLuint -double_types(struct brw_context *brw, - int size, - GLboolean doubles) -{ - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, - * 64-bit components are stored in the URB without any conversion." - * Also included on BDW PRM, Volume 7, page 470, table "Source Element - * Formats Supported in VF Unit" - * - * Previous PRMs don't include those references, so for gen7 we can't use - * PASSTHRU formats directly. But in any case, we prefer to return passthru - * even in that case, because that reflects what we want to achieve, even - * if we would need to workaround on gen < 8. - */ - return (doubles - ? double_types_passthru[size] - : double_types_float[size]); -} - -static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} - -static int -uploads_needed(uint32_t format) -{ - if (!is_passthru_format(format)) - return 1; - - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - return 1; - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return 2; - default: - unreachable("not reached"); - } -} - -/* - * Returns the number of componentes associated with a format that is used on - * a 64 to 32 format split. See downsize_format() - */ -static int -upload_format_size(uint32_t upload_format) -{ - switch (upload_format) { - case ISL_FORMAT_R32G32_FLOAT: - return 2; - case ISL_FORMAT_R32G32B32A32_FLOAT: - return 4; - default: - unreachable("not reached"); - } -} - -/* - * Returns the format that we are finally going to use when upload a vertex - * element. It will only change if we are using *64*PASSTHRU formats, as for - * gen < 8 they need to be splitted on two *32*FLOAT formats. - * - * @upload points in which upload we are. Valid values are [0,1] - */ -static uint32_t -downsize_format_if_needed(uint32_t format, - int upload) -{ - assert(upload == 0 || upload == 1); - - if (!is_passthru_format(format)) - return format; - - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - return ISL_FORMAT_R32G32_FLOAT; - case ISL_FORMAT_R64G64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; - case ISL_FORMAT_R64G64B64_PASSTHRU: - return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT - : ISL_FORMAT_R32G32_FLOAT; - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return ISL_FORMAT_R32G32B32A32_FLOAT; - default: - unreachable("not reached"); - } -} - -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -unsigned -brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_vertex_array *glarray) -{ - int size = glarray->Size; - const bool is_ivybridge_or_older = - brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell; - - if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) - fprintf(stderr, "type %s size %d normalized %d\n", - _mesa_enum_to_string(glarray->Type), - glarray->Size, glarray->Normalized); - - if (glarray->Integer) { - assert(glarray->Format == GL_RGBA); /* sanity check */ - switch (glarray->Type) { - case GL_INT: return int_types_direct[size]; - case GL_SHORT: - if (is_ivybridge_or_older && size == 3) - return short_types_direct[4]; - else - return short_types_direct[size]; - case GL_BYTE: - if (is_ivybridge_or_older && size == 3) - return byte_types_direct[4]; - else - return byte_types_direct[size]; - case GL_UNSIGNED_INT: return uint_types_direct[size]; - case GL_UNSIGNED_SHORT: - if (is_ivybridge_or_older && size == 3) - return ushort_types_direct[4]; - else - return ushort_types_direct[size]; - case GL_UNSIGNED_BYTE: - if (is_ivybridge_or_older && size == 3) - return ubyte_types_direct[4]; - else - return ubyte_types_direct[size]; - default: unreachable("not reached"); - } - } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { - return ISL_FORMAT_R11G11B10_FLOAT; - } else if (glarray->Normalized) { - switch (glarray->Type) { - case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); - case GL_FLOAT: return float_types[size]; - case GL_HALF_FLOAT: - case GL_HALF_FLOAT_OES: - if (brw->gen < 6 && size == 3) - return half_float_types[4]; - else - return half_float_types[size]; - case GL_INT: return int_types_norm[size]; - case GL_SHORT: return short_types_norm[size]; - case GL_BYTE: return byte_types_norm[size]; - case GL_UNSIGNED_INT: return uint_types_norm[size]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: - if (glarray->Format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - assert(size == 4); - return ISL_FORMAT_B8G8R8A8_UNORM; - } - else { - return ubyte_types_norm[size]; - } - case GL_FIXED: - if (brw->gen >= 8 || brw->is_haswell) - return fixed_point_types[size]; - - /* This produces GL_FIXED inputs as values between INT32_MIN and - * INT32_MAX, which will be scaled down by 1/65536 by the VS. - */ - return int_types_scale[size]; - /* See GL_ARB_vertex_type_2_10_10_10_rev. - * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd - * like to use here, so upload everything as UINT and fix - * it in the shader - */ - case GL_INT_2_10_10_10_REV: - assert(size == 4); - if (brw->gen >= 8 || brw->is_haswell) { - return glarray->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_SNORM - : ISL_FORMAT_R10G10B10A2_SNORM; - } - return ISL_FORMAT_R10G10B10A2_UINT; - case GL_UNSIGNED_INT_2_10_10_10_REV: - assert(size == 4); - if (brw->gen >= 8 || brw->is_haswell) { - return glarray->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_UNORM - : ISL_FORMAT_R10G10B10A2_UNORM; - } - return ISL_FORMAT_R10G10B10A2_UINT; - default: unreachable("not reached"); - } - } - else { - /* See GL_ARB_vertex_type_2_10_10_10_rev. - * W/A: the hardware doesn't really support the formats we'd - * like to use here, so upload everything as UINT and fix - * it in the shader - */ - if (glarray->Type == GL_INT_2_10_10_10_REV) { - assert(size == 4); - if (brw->gen >= 8 || brw->is_haswell) { - return glarray->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_SSCALED - : ISL_FORMAT_R10G10B10A2_SSCALED; - } - return ISL_FORMAT_R10G10B10A2_UINT; - } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { - assert(size == 4); - if (brw->gen >= 8 || brw->is_haswell) { - return glarray->Format == GL_BGRA - ? ISL_FORMAT_B10G10R10A2_USCALED - : ISL_FORMAT_R10G10B10A2_USCALED; - } - return ISL_FORMAT_R10G10B10A2_UINT; - } - assert(glarray->Format == GL_RGBA); /* sanity check */ - switch (glarray->Type) { - case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); - case GL_FLOAT: return float_types[size]; - case GL_HALF_FLOAT: - case GL_HALF_FLOAT_OES: - if (brw->gen < 6 && size == 3) - return half_float_types[4]; - else - return half_float_types[size]; - case GL_INT: return int_types_scale[size]; - case GL_SHORT: return short_types_scale[size]; - case GL_BYTE: return byte_types_scale[size]; - case GL_UNSIGNED_INT: return uint_types_scale[size]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - case GL_FIXED: - if (brw->gen >= 8 || brw->is_haswell) - return fixed_point_types[size]; - - /* This produces GL_FIXED inputs as values between INT32_MIN and - * INT32_MAX, which will be scaled down by 1/65536 by the VS. - */ - return int_types_scale[size]; - default: unreachable("not reached"); - } - } -} - -static void -copy_array_to_vbo_array(struct brw_context *brw, - struct brw_vertex_element *element, - int min, int max, - struct brw_vertex_buffer *buffer, - GLuint dst_stride) -{ - const int src_stride = element->glarray->StrideB; - - /* If the source stride is zero, we just want to upload the current - * attribute once and set the buffer's stride to 0. There's no need - * to replicate it out. - */ - if (src_stride == 0) { - intel_upload_data(brw, element->glarray->Ptr, - element->glarray->_ElementSize, - element->glarray->_ElementSize, - &buffer->bo, &buffer->offset); - - buffer->stride = 0; - buffer->size = element->glarray->_ElementSize; - return; - } - - const unsigned char *src = element->glarray->Ptr + min * src_stride; - int count = max - min + 1; - GLuint size = count * dst_stride; - uint8_t *dst = intel_upload_space(brw, size, dst_stride, - &buffer->bo, &buffer->offset); - - /* The GL 4.5 spec says: - * "If any enabled array’s buffer binding is zero when DrawArrays or - * one of the other drawing commands defined in section 10.4 is called, - * the result is undefined." - * - * In this case, let's the dst with undefined values - */ - if (src != NULL) { - if (dst_stride == src_stride) { - memcpy(dst, src, size); - } else { - while (count--) { - memcpy(dst, src, dst_stride); - src += src_stride; - dst += dst_stride; - } - } - } - buffer->stride = dst_stride; - buffer->size = size; -} - -void -brw_prepare_vertices(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VS_PROG_DATA */ - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - GLbitfield64 vs_inputs = vs_prog_data->inputs_read; - const unsigned char *ptr = NULL; - GLuint interleaved = 0; - unsigned int min_index = brw->vb.min_index + brw->basevertex; - unsigned int max_index = brw->vb.max_index + brw->basevertex; - unsigned i; - int delta, j; - - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; - GLuint nr_uploads = 0; - - /* _NEW_POLYGON - * - * On gen6+, edge flags don't end up in the VUE (either in or out of the - * VS). Instead, they're uploaded as the last vertex element, and the data - * is passed sideband through the fixed function units. So, we need to - * prepare the vertex buffer for it, but it's not present in inputs_read. - */ - if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL)) { - vs_inputs |= VERT_BIT_EDGEFLAG; - } - - if (0) - fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); - - /* Accumulate the list of enabled arrays. */ - brw->vb.nr_enabled = 0; - while (vs_inputs) { - GLuint first = ffsll(vs_inputs) - 1; - assert (first < 64); - GLuint index = - first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & - BITFIELD64_MASK(first)), 2); - struct brw_vertex_element *input = &brw->vb.inputs[index]; - input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; - vs_inputs &= ~BITFIELD64_BIT(first); - if (input->is_dual_slot) - vs_inputs &= ~BITFIELD64_BIT(first + 1); - brw->vb.enabled[brw->vb.nr_enabled++] = input; - } - - if (brw->vb.nr_enabled == 0) - return; - - if (brw->vb.nr_buffers) - return; - - /* The range of data in a given buffer represented as [min, max) */ - struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; - uint32_t buffer_range_start[VERT_ATTRIB_MAX]; - uint32_t buffer_range_end[VERT_ATTRIB_MAX]; - - for (i = j = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct gl_vertex_array *glarray = input->glarray; - - if (_mesa_is_bufferobj(glarray->BufferObj)) { - struct intel_buffer_object *intel_buffer = - intel_buffer_object(glarray->BufferObj); - - const uint32_t offset = (uintptr_t)glarray->Ptr; - - /* Start with the worst case */ - uint32_t start = 0; - uint32_t range = intel_buffer->Base.Size; - if (glarray->InstanceDivisor) { - if (brw->num_instances) { - start = offset + glarray->StrideB * brw->baseinstance; - range = (glarray->StrideB * ((brw->num_instances - 1) / - glarray->InstanceDivisor) + - glarray->_ElementSize); - } - } else { - if (brw->vb.index_bounds_valid) { - start = offset + min_index * glarray->StrideB; - range = (glarray->StrideB * (max_index - min_index) + - glarray->_ElementSize); - } - } - - /* If we have a VB set to be uploaded for this buffer object - * already, reuse that VB state so that we emit fewer - * relocations. - */ - unsigned k; - for (k = 0; k < i; k++) { - const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray; - if (glarray->BufferObj == other->BufferObj && - glarray->StrideB == other->StrideB && - glarray->InstanceDivisor == other->InstanceDivisor && - (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) - { - input->buffer = brw->vb.enabled[k]->buffer; - input->offset = glarray->Ptr - other->Ptr; - - buffer_range_start[input->buffer] = - MIN2(buffer_range_start[input->buffer], start); - buffer_range_end[input->buffer] = - MAX2(buffer_range_end[input->buffer], start + range); - break; - } - } - if (k == i) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - - /* Named buffer object: Just reference its contents directly. */ - buffer->offset = offset; - buffer->stride = glarray->StrideB; - buffer->step_rate = glarray->InstanceDivisor; - buffer->size = glarray->BufferObj->Size - offset; - - enabled_buffer[j] = intel_buffer; - buffer_range_start[j] = start; - buffer_range_end[j] = start + range; - - input->buffer = j++; - input->offset = 0; - } - } else { - /* Queue the buffer object up to be uploaded in the next pass, - * when we've decided if we're doing interleaved or not. - */ - if (nr_uploads == 0) { - interleaved = glarray->StrideB; - ptr = glarray->Ptr; - } - else if (interleaved != glarray->StrideB || - glarray->Ptr < ptr || - (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) - { - /* If our stride is different from the first attribute's stride, - * or if the first attribute's stride didn't cover our element, - * disable the interleaved upload optimization. The second case - * can most commonly occur in cases where there is a single vertex - * and, for example, the data is stored on the application's - * stack. - * - * NOTE: This will also disable the optimization in cases where - * the data is in a different order than the array indices. - * Something like: - * - * float data[...]; - * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); - * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); - */ - interleaved = 0; - } - - upload[nr_uploads++] = input; - } - } - - /* Now that we've set up all of the buffers, we walk through and reference - * each of them. We do this late so that we get the right size in each - * buffer and don't reference too little data. - */ - for (i = 0; i < j; i++) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - if (buffer->bo) - continue; - - const uint32_t start = buffer_range_start[i]; - const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; - - buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); - brw_bo_reference(buffer->bo); - } - - /* If we need to upload all the arrays, then we can trim those arrays to - * only the used elements [min_index, max_index] so long as we adjust all - * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. - */ - brw->vb.start_vertex_bias = 0; - delta = min_index; - if (nr_uploads == brw->vb.nr_enabled) { - brw->vb.start_vertex_bias = -delta; - delta = 0; - } - - /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1) { - if (interleaved) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], min_index, max_index, - buffer, interleaved); - buffer->offset -= delta * interleaved; - buffer->size += delta * interleaved; - - for (i = 0; i < nr_uploads; i++) { - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->offset = - ((const unsigned char *)upload[i]->glarray->Ptr - ptr); - upload[i]->buffer = j; - } - j++; - - nr_uploads = 0; - } - } - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - if (upload[i]->glarray->InstanceDivisor == 0) { - copy_array_to_vbo_array(brw, upload[i], min_index, max_index, - buffer, upload[i]->glarray->_ElementSize); - } else { - /* This is an instanced attribute, since its InstanceDivisor - * is not zero. Therefore, its data will be stepped after the - * instanced draw has been run InstanceDivisor times. - */ - uint32_t instanced_attr_max_index = - (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; - copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, - buffer, upload[i]->glarray->_ElementSize); - } - buffer->offset -= delta * buffer->stride; - buffer->size += delta * buffer->stride; - buffer->step_rate = upload[i]->glarray->InstanceDivisor; - upload[i]->buffer = j++; - upload[i]->offset = 0; - } - - brw->vb.nr_buffers = j; -} - -void -brw_prepare_shader_draw_parameters(struct brw_context *brw) -{ - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - /* For non-indirect draws, upload gl_BaseVertex. */ - if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) && - brw->draw.draw_params_bo == NULL) { - intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4, - &brw->draw.draw_params_bo, - &brw->draw.draw_params_offset); - } - - if (vs_prog_data->uses_drawid) { - intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4, - &brw->draw.draw_id_bo, - &brw->draw.draw_id_offset); - } -} - -/** - * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). - */ -uint32_t * -brw_emit_vertex_buffer_state(struct brw_context *brw, - unsigned buffer_nr, - struct brw_bo *bo, - unsigned start_offset, - unsigned end_offset, - unsigned stride, - unsigned step_rate, - uint32_t *__map) -{ - struct gl_context *ctx = &brw->ctx; - uint32_t dw0; - - if (brw->gen >= 8) { - dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT; - } else if (brw->gen >= 6) { - dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | - (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA - : GEN6_VB0_ACCESS_VERTEXDATA); - } else { - dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | - (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA - : BRW_VB0_ACCESS_VERTEXDATA); - } - - if (brw->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - switch (brw->gen) { - case 7: - dw0 |= GEN7_MOCS_L3 << 16; - break; - case 8: - dw0 |= BDW_MOCS_WB << 16; - break; - case 9: - dw0 |= SKL_MOCS_WB << 16; - break; - } - - WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), - "VBO stride %d too large, bad rendering may occur\n", - stride); - OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); - if (brw->gen >= 8) { - OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - - * Vertex Fetch (VF) Stage - State - * - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x - * VBState.BufferPitch", the address of the byte immediately beyond the - * last valid byte of the buffer is determined by - * "VBState.StartingBufferAddress + VBState.BufferSize". - */ - OUT_BATCH(end_offset - start_offset); - } else if (brw->gen >= 5) { - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - - * Vertex Fetch (VF) Stage - State - * - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x - * VBState.BufferPitch", the address of the byte immediately beyond the - * last valid byte of the buffer is determined by - * "VBState.EndAddress + 1". - */ - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1); - OUT_BATCH(step_rate); - } else { - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); - OUT_BATCH(0); - OUT_BATCH(step_rate); - } - - return __map; -} - -static void -brw_emit_vertices(struct brw_context *brw) -{ - GLuint i; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - - brw_emit_query_begin(brw); - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - unsigned nr_elements = brw->vb.nr_enabled; - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) - ++nr_elements; - if (vs_prog_data->uses_drawid) - nr_elements++; - - /* If any of the formats of vb.enabled needs more that one upload, we need - * to add it to nr_elements */ - unsigned extra_uploads = 0; - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - - if (uploads_needed(format) > 1) - extra_uploads++; - } - nr_elements += extra_uploads; - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); - if (brw->gen >= 6) { - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - } else { - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - } - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return; - } - - /* Now emit VB and VEP state packets. - */ - - const bool uses_draw_params = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + vs_prog_data->uses_drawid; - - if (nr_buffers) { - if (brw->gen >= 6) { - assert(nr_buffers <= 33); - } else { - assert(nr_buffers <= 17); - } - - BEGIN_BATCH(1 + 4 * nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); - for (i = 0; i < brw->vb.nr_buffers; i++) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - /* Prior to Haswell and Bay Trail we have to use 4-component formats - * to fake 3-component ones. In particular, we do this for - * half-float and 8 and 16-bit integer formats. This means that the - * vertex element may poke over the end of the buffer by 2 bytes. - */ - unsigned padding = - (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset, - buffer->offset + buffer->size + padding, - buffer->stride, buffer->step_rate); - - } - - if (uses_draw_params) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0, /* stride */ - 0); /* step rate */ - } - - if (vs_prog_data->uses_drawid) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, - brw->draw.draw_id_bo, - brw->draw.draw_id_offset, - brw->draw.draw_id_bo->size, - 0, /* stride */ - 0); /* step rate */ - } - - ADVANCE_BATCH(); - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably - * for VertexID/InstanceID. - */ - if (brw->gen >= 6) { - assert(nr_elements <= 34); - } else { - assert(nr_elements <= 18); - } - - struct brw_vertex_element *gen6_edgeflag_input = NULL; - - BEGIN_BATCH(1 + nr_elements * 2); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - unsigned num_uploads = 1; - unsigned c; - - num_uploads = uploads_needed(format); - - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - /* Gen6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ - if (brw->gen >= 6) { - gen6_edgeflag_input = input; - continue; - } - } - - for (c = 0; c < num_uploads; c++) { - uint32_t upload_format = downsize_format_if_needed(format, c); - /* If we need more that one upload, the offset stride would be 128 - * bits (16 bytes), as for previous uploads we are using the full - * entry. */ - unsigned int offset = input->offset + c * 16; - int size = input->glarray->Size; - - if (is_passthru_format(format)) - size = upload_format_size(upload_format); - - switch (size) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: comp3 = input->glarray->Integer - ? BRW_VE1_COMPONENT_STORE_1_INT - : BRW_VE1_COMPONENT_STORE_1_FLT; - break; - } - - if (brw->gen >= 6) { - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (upload_format << BRW_VE0_FORMAT_SHIFT) | - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); - } else { - OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (upload_format << BRW_VE0_FORMAT_SHIFT) | - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); - } - - if (brw->gen >= 5) - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - else - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); - } - } - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { - uint32_t dw0 = 0, dw1 = 0; - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; - - if (vs_prog_data->uses_basevertex) - comp0 = BRW_VE1_COMPONENT_STORE_SRC; - - if (vs_prog_data->uses_baseinstance) - comp1 = BRW_VE1_COMPONENT_STORE_SRC; - - if (vs_prog_data->uses_vertexid) - comp2 = BRW_VE1_COMPONENT_STORE_VID; - - if (vs_prog_data->uses_instanceid) - comp3 = BRW_VE1_COMPONENT_STORE_IID; - - dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT); - - if (brw->gen >= 6) { - dw0 |= GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; - } else { - dw0 |= BRW_VE0_VALID | - brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; - } - - /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, - * the format is ignored and the value is always int. - */ - - OUT_BATCH(dw0); - OUT_BATCH(dw1); - } - - if (vs_prog_data->uses_drawid) { - uint32_t dw0 = 0, dw1 = 0; - - dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); - - if (brw->gen >= 6) { - dw0 |= GEN6_VE0_VALID | - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); - } else { - dw0 |= BRW_VE0_VALID | - ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); - - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; - } - - OUT_BATCH(dw0); - OUT_BATCH(dw1); - } - - if (brw->gen >= 6 && gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - ADVANCE_BATCH(); -} - -const struct brw_tracked_state brw_vertices = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = brw_emit_vertices, -}; - static void brw_upload_indices(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 084f97f..acb7334 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -103,7 +103,6 @@ extern const struct brw_tracked_state brw_psp_urb_cbs; extern const struct brw_tracked_state brw_drawing_rect; extern const struct brw_tracked_state brw_indices; -extern const struct brw_tracked_state brw_vertices; extern const struct brw_tracked_state brw_index_buffer; extern const struct brw_tracked_state brw_cs_state; extern const struct brw_tracked_state gen7_cs_push_constants; @@ -125,7 +124,6 @@ extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_index_buffer; extern const struct brw_tracked_state gen8_multisample_state; extern const struct brw_tracked_state gen8_pma_fix; -extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state brw_cs_work_groups_surface; diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index e81cca9..8db160b 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -34,336 +34,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#ifndef NDEBUG -static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} -#endif - -static void -gen8_emit_vertices(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - bool uses_edge_flag; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - - uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { - unsigned vue = brw->vb.nr_enabled; - - /* The element for the edge flags must always be last, so we have to - * insert the SGVS before it in that case. - */ - if (uses_edge_flag) { - assert(vue > 0); - vue--; - } - - WARN_ONCE(vue >= 33, - "Trying to insert VID/IID past 33rd vertex element, " - "need to reorder the vertex attrbutes."); - - unsigned dw1 = 0; - if (vs_prog_data->uses_vertexid) { - dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | - (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ - (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); - } - - if (vs_prog_data->uses_instanceid) { - dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | - (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ - (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(dw1); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Normally we don't need an element for the SGVS attribute because the - * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an - * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if - * we're using draw parameters then we need an element for the those - * values. Additionally if there is an edge flag element then the SGVS - * can't be inserted past that so we need a dummy element to ensure that - * the edge flag is the last one. - */ - const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance || - ((vs_prog_data->uses_instanceid || - vs_prog_data->uses_vertexid) && - uses_edge_flag)); - const unsigned nr_elements = - brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return; - } - - /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ - const bool uses_draw_params = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + vs_prog_data->uses_drawid; - - if (nr_buffers) { - assert(nr_buffers <= 33); - - BEGIN_BATCH(1 + 4 * nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, - buffer->offset, - buffer->offset + buffer->size, - buffer->stride, 0 /* unused */); - } - - if (uses_draw_params) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0 /* stride */, - 0 /* unused */); - } - - if (vs_prog_data->uses_drawid) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, - brw->draw.draw_id_bo, - brw->draw.draw_id_offset, - brw->draw.draw_id_bo->size, - 0 /* stride */, - 0 /* unused */); - } - ADVANCE_BATCH(); - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, - * presumably for VertexID/InstanceID. - */ - assert(nr_elements <= 34); - - struct brw_vertex_element *gen6_edgeflag_input = NULL; - - BEGIN_BATCH(1 + nr_elements * 2); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an - * element which has edge flag enabled." - */ - assert(!(is_passthru_format(format) && uses_edge_flag)); - - /* The gen4 driver expects edgeflag to come in as a float, and passes - * that float on to the tests in the clipper. Mesa's current vertex - * attribute value for EdgeFlag is stored as a float, which works out. - * glEdgeFlagPointer, on the other hand, gives us an unnormalized - * integer ubyte. Just rewrite that to convert to a float. - */ - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - /* Gen6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ - gen6_edgeflag_input = input; - continue; - } - - switch (input->glarray->Size) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: - if (input->glarray->Doubles) { - comp3 = BRW_VE1_COMPONENT_STORE_0; - } else if (input->glarray->Integer) { - comp3 = BRW_VE1_COMPONENT_STORE_1_INT; - } else { - comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; - } - - break; - } - - /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): - * - * "When SourceElementFormat is set to one of the *64*_PASSTHRU - * formats, 64-bit components are stored in the URB without any - * conversion. In this case, vertex elements must be written as 128 - * or 256 bits, with VFCOMP_STORE_0 being used to pad the output - * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red - * component into the URB, Component 1 must be specified as - * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) - * in order to output a 128-bit vertex element, or Components 1-3 must - * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 - * to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element." - */ - if (input->glarray->Doubles && !input->is_dual_slot) { - /* Store vertex elements which correspond to double and dvec2 vertex - * shader inputs as 128-bit vertex elements, instead of 256-bits. - */ - comp2 = BRW_VE1_COMPONENT_NOSTORE; - comp3 = BRW_VE1_COMPONENT_NOSTORE; - } - - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (needs_sgvs_element) { - if (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance) { - OUT_BATCH(GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } else { - OUT_BATCH(GEN6_VE0_VALID); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - } - - if (vs_prog_data->uses_drawid) { - OUT_BATCH(GEN6_VE0_VALID | - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - ADVANCE_BATCH(); - - for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { - const struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; - unsigned element_index; - - /* The edge flag element is reordered to be the last one in the code - * above so we need to compensate for that in the element indices used - * below. - */ - if (input == gen6_edgeflag_input) - element_index = nr_elements - 1; - else - element_index = j++; - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element_index | - (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); - OUT_BATCH(buffer->step_rate); - ADVANCE_BATCH(); - } - - if (vs_prog_data->uses_drawid) { - const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -const struct brw_tracked_state gen8_vertices = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = gen8_emit_vertices, -}; - static void gen8_emit_index_buffer(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 08a6228..b7475dc 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -26,10 +26,16 @@ #include "common/gen_device_info.h" #include "genxml/gen_macros.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/macros.h" + #include "brw_context.h" #if GEN_GEN == 6 #include "brw_defines.h" #endif +#include "brw_draw.h" #include "brw_state.h" #include "brw_wm.h" #include "brw_util.h" @@ -125,6 +131,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) }; } +static inline struct brw_address +vertex_bo(struct brw_bo *bo, uint32_t offset) +{ + return (struct brw_address) { + .bo = bo, + .offset = offset, + .read_domains = I915_GEM_DOMAIN_VERTEX, + .write_domain = 0, + }; +} + #include "genxml/genX_pack.h" #define _brw_cmd_length(cmd) cmd ## _length @@ -160,6 +177,1221 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) #define SKL_MOCS_WB (2 << 1) #define BDW_MOCS_WB 0x78 +#define GEN7_MOCS_L3 1 + +static uint32_t * +genX(emit_vertex_buffer_state)(struct brw_context *brw, + uint32_t *dw, + unsigned buffer_nr, + struct brw_bo *bo, + unsigned start_offset, + unsigned end_offset, + unsigned stride, + unsigned step_rate) +{ + struct GENX(VERTEX_BUFFER_STATE) buf_state = { + .VertexBufferIndex = buffer_nr, + .BufferPitch = stride, + .BufferStartingAddress = vertex_bo(bo, start_offset), +#if GEN_GEN >= 8 + .BufferSize = end_offset - start_offset, +#endif + +#if GEN_GEN >= 7 + .AddressModifyEnable = true, +#endif + +#if GEN_GEN < 8 + .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, + .InstanceDataStepRate = step_rate, +#if GEN_GEN >= 5 + .EndAddress = vertex_bo(bo, end_offset - 1), +#endif +#endif + +#if GEN_GEN == 9 + .VertexBufferMOCS = SKL_MOCS_WB, +#elif GEN_GEN == 8 + .VertexBufferMOCS = BDW_MOCS_WB, +#elif GEN_GEN == 7 + .VertexBufferMOCS = GEN7_MOCS_L3, +#endif + }; + + GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); + return dw + GENX(VERTEX_BUFFER_STATE_length); +} + +UNUSED static bool +is_passthru_format(uint32_t format) +{ + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return true; + default: + return false; + } +} + +#if GEN_GEN < 8 +static int +genX(uploads_needed)(uint32_t format) +{ + if (!is_passthru_format(format)) + return 1; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + return 1; + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return 2; + default: + unreachable("not reached"); + } +} + +/* + * Returns the format that we are finally going to use when upload a vertex + * element. It will only change if we are using *64*PASSTHRU formats, as for + * gen < 8 they need to be splitted on two *32*FLOAT formats. + * + * @upload points in which upload we are. Valid values are [0,1] + */ +static uint32_t +downsize_format_if_needed(uint32_t format, + int upload) +{ + assert(upload == 0 || upload == 1); + + if (!is_passthru_format(format)) + return format; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + return ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R64G64B64_PASSTHRU: + return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT + : ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + default: + unreachable("not reached"); + } +} + +/* + * Returns the number of componentes associated with a format that is used on + * a 64 to 32 format split. See downsize_format() + */ +static int +upload_format_size(uint32_t upload_format) +{ + switch (upload_format) { + case ISL_FORMAT_R32G32_FLOAT: + return 2; + case ISL_FORMAT_R32G32B32A32_FLOAT: + return 4; + default: + unreachable("not reached"); + } +} +#endif + +static void +copy_array_to_vbo_array(struct brw_context *brw, + struct brw_vertex_element *element, + int min, int max, + struct brw_vertex_buffer *buffer, + GLuint dst_stride) +{ + const int src_stride = element->glarray->StrideB; + + /* If the source stride is zero, we just want to upload the current + * attribute once and set the buffer's stride to 0. There's no need + * to replicate it out. + */ + if (src_stride == 0) { + intel_upload_data(brw, element->glarray->Ptr, + element->glarray->_ElementSize, + element->glarray->_ElementSize, + &buffer->bo, &buffer->offset); + + buffer->stride = 0; + buffer->size = element->glarray->_ElementSize; + return; + } + + const unsigned char *src = element->glarray->Ptr + min * src_stride; + int count = max - min + 1; + GLuint size = count * dst_stride; + uint8_t *dst = intel_upload_space(brw, size, dst_stride, + &buffer->bo, &buffer->offset); + + /* The GL 4.5 spec says: + * "If any enabled array’s buffer binding is zero when DrawArrays or + * one of the other drawing commands defined in section 10.4 is called, + * the result is undefined." + * + * In this case, let's the dst with undefined values + */ + if (src != NULL) { + if (dst_stride == src_stride) { + memcpy(dst, src, size); + } else { + while (count--) { + memcpy(dst, src, dst_stride); + src += src_stride; + dst += dst_stride; + } + } + } + buffer->stride = dst_stride; + buffer->size = size; +} + +static void +brw_prepare_vertices(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VS_PROG_DATA */ + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + GLbitfield64 vs_inputs = vs_prog_data->inputs_read; + const unsigned char *ptr = NULL; + GLuint interleaved = 0; + unsigned int min_index = brw->vb.min_index + brw->basevertex; + unsigned int max_index = brw->vb.max_index + brw->basevertex; + unsigned i; + int delta, j; + + struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; + GLuint nr_uploads = 0; + + /* _NEW_POLYGON + * + * On gen6+, edge flags don't end up in the VUE (either in or out of the + * VS). Instead, they're uploaded as the last vertex element, and the data + * is passed sideband through the fixed function units. So, we need to + * prepare the vertex buffer for it, but it's not present in inputs_read. + */ + if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL)) { + vs_inputs |= VERT_BIT_EDGEFLAG; + } + + if (0) + fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); + + /* Accumulate the list of enabled arrays. */ + brw->vb.nr_enabled = 0; + while (vs_inputs) { + GLuint first = ffsll(vs_inputs) - 1; + assert (first < 64); + GLuint index = + first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & + BITFIELD64_MASK(first)), 2); + struct brw_vertex_element *input = &brw->vb.inputs[index]; + input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; + vs_inputs &= ~BITFIELD64_BIT(first); + if (input->is_dual_slot) + vs_inputs &= ~BITFIELD64_BIT(first + 1); + brw->vb.enabled[brw->vb.nr_enabled++] = input; + } + + if (brw->vb.nr_enabled == 0) + return; + + if (brw->vb.nr_buffers) + return; + + /* The range of data in a given buffer represented as [min, max) */ + struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; + uint32_t buffer_range_start[VERT_ATTRIB_MAX]; + uint32_t buffer_range_end[VERT_ATTRIB_MAX]; + + for (i = j = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + const struct gl_vertex_array *glarray = input->glarray; + + if (_mesa_is_bufferobj(glarray->BufferObj)) { + struct intel_buffer_object *intel_buffer = + intel_buffer_object(glarray->BufferObj); + + const uint32_t offset = (uintptr_t)glarray->Ptr; + + /* Start with the worst case */ + uint32_t start = 0; + uint32_t range = intel_buffer->Base.Size; + if (glarray->InstanceDivisor) { + if (brw->num_instances) { + start = offset + glarray->StrideB * brw->baseinstance; + range = (glarray->StrideB * ((brw->num_instances - 1) / + glarray->InstanceDivisor) + + glarray->_ElementSize); + } + } else { + if (brw->vb.index_bounds_valid) { + start = offset + min_index * glarray->StrideB; + range = (glarray->StrideB * (max_index - min_index) + + glarray->_ElementSize); + } + } + + /* If we have a VB set to be uploaded for this buffer object + * already, reuse that VB state so that we emit fewer + * relocations. + */ + unsigned k; + for (k = 0; k < i; k++) { + const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray; + if (glarray->BufferObj == other->BufferObj && + glarray->StrideB == other->StrideB && + glarray->InstanceDivisor == other->InstanceDivisor && + (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) + { + input->buffer = brw->vb.enabled[k]->buffer; + input->offset = glarray->Ptr - other->Ptr; + + buffer_range_start[input->buffer] = + MIN2(buffer_range_start[input->buffer], start); + buffer_range_end[input->buffer] = + MAX2(buffer_range_end[input->buffer], start + range); + break; + } + } + if (k == i) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + + /* Named buffer object: Just reference its contents directly. */ + buffer->offset = offset; + buffer->stride = glarray->StrideB; + buffer->step_rate = glarray->InstanceDivisor; + buffer->size = glarray->BufferObj->Size - offset; + + enabled_buffer[j] = intel_buffer; + buffer_range_start[j] = start; + buffer_range_end[j] = start + range; + + input->buffer = j++; + input->offset = 0; + } + } else { + /* Queue the buffer object up to be uploaded in the next pass, + * when we've decided if we're doing interleaved or not. + */ + if (nr_uploads == 0) { + interleaved = glarray->StrideB; + ptr = glarray->Ptr; + } + else if (interleaved != glarray->StrideB || + glarray->Ptr < ptr || + (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) + { + /* If our stride is different from the first attribute's stride, + * or if the first attribute's stride didn't cover our element, + * disable the interleaved upload optimization. The second case + * can most commonly occur in cases where there is a single vertex + * and, for example, the data is stored on the application's + * stack. + * + * NOTE: This will also disable the optimization in cases where + * the data is in a different order than the array indices. + * Something like: + * + * float data[...]; + * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); + * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); + */ + interleaved = 0; + } + + upload[nr_uploads++] = input; + } + } + + /* Now that we've set up all of the buffers, we walk through and reference + * each of them. We do this late so that we get the right size in each + * buffer and don't reference too little data. + */ + for (i = 0; i < j; i++) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; + if (buffer->bo) + continue; + + const uint32_t start = buffer_range_start[i]; + const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; + + buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); + brw_bo_reference(buffer->bo); + } + + /* If we need to upload all the arrays, then we can trim those arrays to + * only the used elements [min_index, max_index] so long as we adjust all + * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. + */ + brw->vb.start_vertex_bias = 0; + delta = min_index; + if (nr_uploads == brw->vb.nr_enabled) { + brw->vb.start_vertex_bias = -delta; + delta = 0; + } + + /* Handle any arrays to be uploaded. */ + if (nr_uploads > 1) { + if (interleaved) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], min_index, max_index, + buffer, interleaved); + buffer->offset -= delta * interleaved; + buffer->size += delta * interleaved; + + for (i = 0; i < nr_uploads; i++) { + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->offset = + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->buffer = j; + } + j++; + + nr_uploads = 0; + } + } + /* Upload non-interleaved arrays */ + for (i = 0; i < nr_uploads; i++) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + if (upload[i]->glarray->InstanceDivisor == 0) { + copy_array_to_vbo_array(brw, upload[i], min_index, max_index, + buffer, upload[i]->glarray->_ElementSize); + } else { + /* This is an instanced attribute, since its InstanceDivisor + * is not zero. Therefore, its data will be stepped after the + * instanced draw has been run InstanceDivisor times. + */ + uint32_t instanced_attr_max_index = + (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; + copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, + buffer, upload[i]->glarray->_ElementSize); + } + buffer->offset -= delta * buffer->stride; + buffer->size += delta * buffer->stride; + buffer->step_rate = upload[i]->glarray->InstanceDivisor; + upload[i]->buffer = j++; + upload[i]->offset = 0; + } + + brw->vb.nr_buffers = j; +} + +static const GLuint double_types_float[5] = { + 0, + ISL_FORMAT_R64_FLOAT, + ISL_FORMAT_R64G64_FLOAT, + ISL_FORMAT_R64G64B64_FLOAT, + ISL_FORMAT_R64G64B64A64_FLOAT +}; + +static const GLuint double_types_passthru[5] = { + 0, + ISL_FORMAT_R64_PASSTHRU, + ISL_FORMAT_R64G64_PASSTHRU, + ISL_FORMAT_R64G64B64_PASSTHRU, + ISL_FORMAT_R64G64B64A64_PASSTHRU +}; + +static const GLuint float_types[5] = { + 0, + ISL_FORMAT_R32_FLOAT, + ISL_FORMAT_R32G32_FLOAT, + ISL_FORMAT_R32G32B32_FLOAT, + ISL_FORMAT_R32G32B32A32_FLOAT +}; + +static const GLuint half_float_types[5] = { + 0, + ISL_FORMAT_R16_FLOAT, + ISL_FORMAT_R16G16_FLOAT, + ISL_FORMAT_R16G16B16_FLOAT, + ISL_FORMAT_R16G16B16A16_FLOAT +}; + +static const GLuint fixed_point_types[5] = { + 0, + ISL_FORMAT_R32_SFIXED, + ISL_FORMAT_R32G32_SFIXED, + ISL_FORMAT_R32G32B32_SFIXED, + ISL_FORMAT_R32G32B32A32_SFIXED, +}; + +static const GLuint uint_types_direct[5] = { + 0, + ISL_FORMAT_R32_UINT, + ISL_FORMAT_R32G32_UINT, + ISL_FORMAT_R32G32B32_UINT, + ISL_FORMAT_R32G32B32A32_UINT +}; + +static const GLuint uint_types_norm[5] = { + 0, + ISL_FORMAT_R32_UNORM, + ISL_FORMAT_R32G32_UNORM, + ISL_FORMAT_R32G32B32_UNORM, + ISL_FORMAT_R32G32B32A32_UNORM +}; + +static const GLuint uint_types_scale[5] = { + 0, + ISL_FORMAT_R32_USCALED, + ISL_FORMAT_R32G32_USCALED, + ISL_FORMAT_R32G32B32_USCALED, + ISL_FORMAT_R32G32B32A32_USCALED +}; + +static const GLuint int_types_direct[5] = { + 0, + ISL_FORMAT_R32_SINT, + ISL_FORMAT_R32G32_SINT, + ISL_FORMAT_R32G32B32_SINT, + ISL_FORMAT_R32G32B32A32_SINT +}; + +static const GLuint int_types_norm[5] = { + 0, + ISL_FORMAT_R32_SNORM, + ISL_FORMAT_R32G32_SNORM, + ISL_FORMAT_R32G32B32_SNORM, + ISL_FORMAT_R32G32B32A32_SNORM +}; + +static const GLuint int_types_scale[5] = { + 0, + ISL_FORMAT_R32_SSCALED, + ISL_FORMAT_R32G32_SSCALED, + ISL_FORMAT_R32G32B32_SSCALED, + ISL_FORMAT_R32G32B32A32_SSCALED +}; + +static const GLuint ushort_types_direct[5] = { + 0, + ISL_FORMAT_R16_UINT, + ISL_FORMAT_R16G16_UINT, + ISL_FORMAT_R16G16B16_UINT, + ISL_FORMAT_R16G16B16A16_UINT +}; + +static const GLuint ushort_types_norm[5] = { + 0, + ISL_FORMAT_R16_UNORM, + ISL_FORMAT_R16G16_UNORM, + ISL_FORMAT_R16G16B16_UNORM, + ISL_FORMAT_R16G16B16A16_UNORM +}; + +static const GLuint ushort_types_scale[5] = { + 0, + ISL_FORMAT_R16_USCALED, + ISL_FORMAT_R16G16_USCALED, + ISL_FORMAT_R16G16B16_USCALED, + ISL_FORMAT_R16G16B16A16_USCALED +}; + +static const GLuint short_types_direct[5] = { + 0, + ISL_FORMAT_R16_SINT, + ISL_FORMAT_R16G16_SINT, + ISL_FORMAT_R16G16B16_SINT, + ISL_FORMAT_R16G16B16A16_SINT +}; + +static const GLuint short_types_norm[5] = { + 0, + ISL_FORMAT_R16_SNORM, + ISL_FORMAT_R16G16_SNORM, + ISL_FORMAT_R16G16B16_SNORM, + ISL_FORMAT_R16G16B16A16_SNORM +}; + +static const GLuint short_types_scale[5] = { + 0, + ISL_FORMAT_R16_SSCALED, + ISL_FORMAT_R16G16_SSCALED, + ISL_FORMAT_R16G16B16_SSCALED, + ISL_FORMAT_R16G16B16A16_SSCALED +}; + +static const GLuint ubyte_types_direct[5] = { + 0, + ISL_FORMAT_R8_UINT, + ISL_FORMAT_R8G8_UINT, + ISL_FORMAT_R8G8B8_UINT, + ISL_FORMAT_R8G8B8A8_UINT +}; + +static const GLuint ubyte_types_norm[5] = { + 0, + ISL_FORMAT_R8_UNORM, + ISL_FORMAT_R8G8_UNORM, + ISL_FORMAT_R8G8B8_UNORM, + ISL_FORMAT_R8G8B8A8_UNORM +}; + +static const GLuint ubyte_types_scale[5] = { + 0, + ISL_FORMAT_R8_USCALED, + ISL_FORMAT_R8G8_USCALED, + ISL_FORMAT_R8G8B8_USCALED, + ISL_FORMAT_R8G8B8A8_USCALED +}; + +static const GLuint byte_types_direct[5] = { + 0, + ISL_FORMAT_R8_SINT, + ISL_FORMAT_R8G8_SINT, + ISL_FORMAT_R8G8B8_SINT, + ISL_FORMAT_R8G8B8A8_SINT +}; + +static const GLuint byte_types_norm[5] = { + 0, + ISL_FORMAT_R8_SNORM, + ISL_FORMAT_R8G8_SNORM, + ISL_FORMAT_R8G8B8_SNORM, + ISL_FORMAT_R8G8B8A8_SNORM +}; + +static const GLuint byte_types_scale[5] = { + 0, + ISL_FORMAT_R8_SSCALED, + ISL_FORMAT_R8G8_SSCALED, + ISL_FORMAT_R8G8B8_SSCALED, + ISL_FORMAT_R8G8B8A8_SSCALED +}; + +static GLuint +double_types(struct brw_context *brw, + int size, + GLboolean doubles) +{ + /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): + * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats, + * 64-bit components are stored in the URB without any conversion." + * Also included on BDW PRM, Volume 7, page 470, table "Source Element + * Formats Supported in VF Unit" + * + * Previous PRMs don't include those references, so for gen7 we can't use + * PASSTHRU formats directly. But in any case, we prefer to return passthru + * even in that case, because that reflects what we want to achieve, even + * if we would need to workaround on gen < 8. + */ + return (doubles + ? double_types_passthru[size] + : double_types_float[size]); +} + +/** + * Given vertex array type/size/format/normalized info, return + * the appopriate hardware surface type. + * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. + */ +static unsigned +brw_get_vertex_surface_type(struct brw_context *brw, + const struct gl_vertex_array *glarray) +{ + int size = glarray->Size; + const bool is_ivybridge_or_older = + brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell; + + if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) + fprintf(stderr, "type %s size %d normalized %d\n", + _mesa_enum_to_string(glarray->Type), + glarray->Size, glarray->Normalized); + + if (glarray->Integer) { + assert(glarray->Format == GL_RGBA); /* sanity check */ + switch (glarray->Type) { + case GL_INT: return int_types_direct[size]; + case GL_SHORT: + if (is_ivybridge_or_older && size == 3) + return short_types_direct[4]; + else + return short_types_direct[size]; + case GL_BYTE: + if (is_ivybridge_or_older && size == 3) + return byte_types_direct[4]; + else + return byte_types_direct[size]; + case GL_UNSIGNED_INT: return uint_types_direct[size]; + case GL_UNSIGNED_SHORT: + if (is_ivybridge_or_older && size == 3) + return ushort_types_direct[4]; + else + return ushort_types_direct[size]; + case GL_UNSIGNED_BYTE: + if (is_ivybridge_or_older && size == 3) + return ubyte_types_direct[4]; + else + return ubyte_types_direct[size]; + default: unreachable("not reached"); + } + } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { + return ISL_FORMAT_R11G11B10_FLOAT; + } else if (glarray->Normalized) { + switch (glarray->Type) { + case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); + case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: + case GL_HALF_FLOAT_OES: + if (brw->gen < 6 && size == 3) + return half_float_types[4]; + else + return half_float_types[size]; + case GL_INT: return int_types_norm[size]; + case GL_SHORT: return short_types_norm[size]; + case GL_BYTE: return byte_types_norm[size]; + case GL_UNSIGNED_INT: return uint_types_norm[size]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; + case GL_UNSIGNED_BYTE: + if (glarray->Format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + assert(size == 4); + return ISL_FORMAT_B8G8R8A8_UNORM; + } + else { + return ubyte_types_norm[size]; + } + case GL_FIXED: + if (brw->gen >= 8 || brw->is_haswell) + return fixed_point_types[size]; + + /* This produces GL_FIXED inputs as values between INT32_MIN and + * INT32_MAX, which will be scaled down by 1/65536 by the VS. + */ + return int_types_scale[size]; + /* See GL_ARB_vertex_type_2_10_10_10_rev. + * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader + */ + case GL_INT_2_10_10_10_REV: + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? ISL_FORMAT_B10G10R10A2_SNORM + : ISL_FORMAT_R10G10B10A2_SNORM; + } + return ISL_FORMAT_R10G10B10A2_UINT; + case GL_UNSIGNED_INT_2_10_10_10_REV: + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? ISL_FORMAT_B10G10R10A2_UNORM + : ISL_FORMAT_R10G10B10A2_UNORM; + } + return ISL_FORMAT_R10G10B10A2_UINT; + default: unreachable("not reached"); + } + } + else { + /* See GL_ARB_vertex_type_2_10_10_10_rev. + * W/A: the hardware doesn't really support the formats we'd + * like to use here, so upload everything as UINT and fix + * it in the shader + */ + if (glarray->Type == GL_INT_2_10_10_10_REV) { + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? ISL_FORMAT_B10G10R10A2_SSCALED + : ISL_FORMAT_R10G10B10A2_SSCALED; + } + return ISL_FORMAT_R10G10B10A2_UINT; + } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { + assert(size == 4); + if (brw->gen >= 8 || brw->is_haswell) { + return glarray->Format == GL_BGRA + ? ISL_FORMAT_B10G10R10A2_USCALED + : ISL_FORMAT_R10G10B10A2_USCALED; + } + return ISL_FORMAT_R10G10B10A2_UINT; + } + assert(glarray->Format == GL_RGBA); /* sanity check */ + switch (glarray->Type) { + case GL_DOUBLE: return double_types(brw, size, glarray->Doubles); + case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: + case GL_HALF_FLOAT_OES: + if (brw->gen < 6 && size == 3) + return half_float_types[4]; + else + return half_float_types[size]; + case GL_INT: return int_types_scale[size]; + case GL_SHORT: return short_types_scale[size]; + case GL_BYTE: return byte_types_scale[size]; + case GL_UNSIGNED_INT: return uint_types_scale[size]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; + case GL_FIXED: + if (brw->gen >= 8 || brw->is_haswell) + return fixed_point_types[size]; + + /* This produces GL_FIXED inputs as values between INT32_MIN and + * INT32_MAX, which will be scaled down by 1/65536 by the VS. + */ + return int_types_scale[size]; + default: unreachable("not reached"); + } + } +} + +static void +brw_prepare_shader_draw_parameters(struct brw_context *brw) +{ + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + + /* For non-indirect draws, upload gl_BaseVertex. */ + if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) && + brw->draw.draw_params_bo == NULL) { + intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4, + &brw->draw.draw_params_bo, + &brw->draw.draw_params_offset); + } + + if (vs_prog_data->uses_drawid) { + intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4, + &brw->draw.draw_id_bo, + &brw->draw.draw_id_offset); + } +} + +static void +genX(emit_vertices)(struct brw_context *brw) +{ + uint32_t *dw; + + brw_prepare_vertices(brw); + brw_prepare_shader_draw_parameters(brw); + +#if GEN_GEN < 8 + brw_emit_query_begin(brw); +#endif + + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + +#if GEN_GEN >= 8 + struct gl_context *ctx = &brw->ctx; + bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { + unsigned vue = brw->vb.nr_enabled; + + /* The element for the edge flags must always be last, so we have to + * insert the SGVS before it in that case. + */ + if (uses_edge_flag) { + assert(vue > 0); + vue--; + } + + WARN_ONCE(vue >= 33, + "Trying to insert VID/IID past 33rd vertex element, " + "need to reorder the vertex attrbutes."); + + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { + if (vs_prog_data->uses_vertexid) { + vfs.VertexIDEnable = true; + vfs.VertexIDComponentNumber = 2; + vfs.VertexIDElementOffset = vue; + } + + if (vs_prog_data->uses_instanceid) { + vfs.InstanceIDEnable = true; + vfs.InstanceIDComponentNumber = 3; + vfs.InstanceIDElementOffset = vue; + } + } + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.InstancingEnable = true; + vfi.VertexElementIndex = vue; + } + } else { + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); + } + + /* Normally we don't need an element for the SGVS attribute because the + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if + * we're using draw parameters then we need an element for the those + * values. Additionally if there is an edge flag element then the SGVS + * can't be inserted past that so we need a dummy element to ensure that + * the edge flag is the last one. + */ + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + ((vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid) + && uses_edge_flag)); +#else + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid); +#endif + unsigned nr_elements = + brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; + +#if GEN_GEN < 8 + /* If any of the formats of vb.enabled needs more that one upload, we need + * to add it to nr_elements */ + for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + + if (genX(uploads_needed(format)) > 1) + nr_elements++; + } +#endif + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (nr_elements == 0) { + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 3 + GENX(VERTEX_ELEMENT_STATE_length)); + struct GENX(VERTEX_ELEMENT_STATE) elem = { + .Valid = true, + .SourceElementFormat = SF_R32G32B32A32_FLOAT, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_1_FP, + }; + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); + return; + } + + /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ + const bool uses_draw_params = + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; + const unsigned nr_buffers = brw->vb.nr_buffers + + uses_draw_params + vs_prog_data->uses_drawid; + + if (nr_buffers) { +#if GEN_GEN >= 6 + assert(nr_buffers <= 33); +#else + assert(nr_buffers <= 17); +#endif + + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), + 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers); + + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; + /* Prior to Haswell and Bay Trail we have to use 4-component formats + * to fake 3-component ones. In particular, we do this for + * half-float and 8 and 16-bit integer formats. This means that the + * vertex element may poke over the end of the buffer by 2 bytes. + */ + unsigned padding = + (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; + dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, + buffer->offset, + buffer->offset + buffer->size + padding, + buffer->stride, + buffer->step_rate); + } + + if (uses_draw_params) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, + brw->draw.draw_params_bo, + brw->draw.draw_params_offset, + brw->draw.draw_params_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + + if (vs_prog_data->uses_drawid) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, + brw->draw.draw_id_bo, + brw->draw.draw_id_offset, + brw->draw.draw_id_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + } + + /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, + * presumably for VertexID/InstanceID. + */ +#if GEN_GEN >= 6 + assert(nr_elements <= 34); + struct brw_vertex_element *gen6_edgeflag_input = NULL; +#else + assert(nr_elements <= 18); +#endif + + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), + 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); + unsigned i; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + uint32_t comp0 = VFCOMP_STORE_SRC; + uint32_t comp1 = VFCOMP_STORE_SRC; + uint32_t comp2 = VFCOMP_STORE_SRC; + uint32_t comp3 = VFCOMP_STORE_SRC; + unsigned num_uploads = 1; + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): + * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an + * element which has edge flag enabled." + */ + assert(!(is_passthru_format(format) && uses_edge_flag)); +#endif + + /* The gen4 driver expects edgeflag to come in as a float, and passes + * that float on to the tests in the clipper. Mesa's current vertex + * attribute value for EdgeFlag is stored as a float, which works out. + * glEdgeFlagPointer, on the other hand, gives us an unnormalized + * integer ubyte. Just rewrite that to convert to a float. + */ + if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { +#if GEN_GEN >= 6 + /* Gen6+ passes edgeflag as sideband along with the vertex, instead + * of in the VUE. We have to upload it sideband as the last vertex + * element according to the B-Spec. + */ + gen6_edgeflag_input = input; + continue; +#endif + } + +#if GEN_GEN < 8 + num_uploads = genX(uploads_needed(format)); +#endif + + for (unsigned c = 0; c < num_uploads; c++) { +#if GEN_GEN < 8 + uint32_t upload_format = downsize_format_if_needed(format, c); +#endif + /* If we need more that one upload, the offset stride would be 128 + * bits (16 bytes), as for previous uploads we are using the full + * entry. */ + unsigned int offset = input->offset + c * 16; + int size = input->glarray->Size; + +#if GEN_GEN < 8 + if (is_passthru_format(format)) + size = upload_format_size(upload_format); +#endif + + switch (size) { + case 0: comp0 = VFCOMP_STORE_0; + case 1: comp1 = VFCOMP_STORE_0; + case 2: comp2 = VFCOMP_STORE_0; + case 3: +#if GEN_GEN >= 8 + if (input->glarray->Doubles) { + comp3 = VFCOMP_STORE_0; + } else +#endif + if (input->glarray->Integer) { + comp3 = VFCOMP_STORE_1_INT; + } else { + comp3 = VFCOMP_STORE_1_FP; + } + + break; + } + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): + * + * "When SourceElementFormat is set to one of the *64*_PASSTHRU + * formats, 64-bit components are stored in the URB without any + * conversion. In this case, vertex elements must be written as 128 + * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as + * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red + * component into the URB, Component 1 must be specified as + * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in + * order to output a 128-bit vertex element, or Components 1-3 must + * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex + * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 + * to be specified as VFCOMP_STORE_0 in order to output a 256-bit + * vertex element." + */ + if (input->glarray->Doubles && !input->is_dual_slot) { + /* Store vertex elements which correspond to double and dvec2 vertex + * shader inputs as 128-bit vertex elements, instead of 256-bits. + */ + comp2 = VFCOMP_NOSTORE; + comp3 = VFCOMP_NOSTORE; + } +#endif + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .VertexBufferIndex = input->buffer, + .Valid = true, +#if GEN_GEN < 8 + .SourceElementFormat = upload_format, +#else + .SourceElementFormat = format, +#endif + .SourceElementOffset = offset, + .Component0Control = comp0, + .Component1Control = comp1, + .Component2Control = comp2, + .Component3Control = comp3, +#if GEN_GEN < 6 + .DestinationElementOffset = i * 4, +#endif + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + } + + if (needs_sgvs_element) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#if GEN_GEN < 6 + .DestinationElementOffset = i * 4, +#endif + }; + +#if GEN_GEN >= 8 + if (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) { + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = SF_R32G32_UINT; + elem_state.Component0Control = VFCOMP_STORE_SRC; + elem_state.Component1Control = VFCOMP_STORE_SRC; + } +#else + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = SF_R32G32_UINT; + if (vs_prog_data->uses_basevertex) + elem_state.Component0Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_baseinstance) + elem_state.Component1Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_vertexid) + elem_state.Component2Control = VFCOMP_STORE_VID; + + if (vs_prog_data->uses_instanceid) + elem_state.Component3Control = VFCOMP_STORE_IID; +#endif + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + + if (vs_prog_data->uses_drawid) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = brw->vb.nr_buffers + 1, + .SourceElementFormat = SF_R32_UINT, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#if GEN_GEN < 6 + .DestinationElementOffset = i * 4, +#endif + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + +#if GEN_GEN >= 6 + if (gen6_edgeflag_input) { + uint32_t format = + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = gen6_edgeflag_input->buffer, + .EdgeFlagEnable = true, + .SourceElementFormat = format, + .SourceElementOffset = gen6_edgeflag_input->offset, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } +#endif + +#if GEN_GEN >= 8 + for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { + const struct brw_vertex_element *input = brw->vb.enabled[i]; + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; + unsigned element_index; + + /* The edge flag element is reordered to be the last one in the code + * above so we need to compensate for that in the element indices used + * below. + */ + if (input == gen6_edgeflag_input) + element_index = nr_elements - 1; + else + element_index = j++; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element_index; + vfi.InstancingEnable = buffer->step_rate ? true : false; + vfi.InstanceDataStepRate = buffer->step_rate; + } + } + + if (vs_prog_data->uses_drawid) { + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element; + } + } +#endif +} + +static const struct brw_tracked_state genX(vertices) = { + .dirty = { + .mesa = _NEW_POLYGON, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_VERTICES | + BRW_NEW_VS_PROG_DATA, + }, + .emit = genX(emit_vertices), +}; #if GEN_GEN >= 6 /** @@ -1746,7 +2978,6 @@ static const struct brw_tracked_state genX(scissor_state) = { .emit = genX(upload_scissor_state), }; -#define GEN7_MOCS_L3 1 #define upload_constant_state(brw, stage_state, active, opcode) \ do { \ uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; \ @@ -3005,7 +4236,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_drawing_rect, &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), &brw_constant_buffer }; @@ -3072,7 +4303,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), }; #elif GEN_GEN == 7 static const struct brw_tracked_state *render_atoms[] = @@ -3160,7 +4391,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), &haswell_cut_index, }; @@ -3253,7 +4484,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, &gen8_index_buffer, - &gen8_vertices, + &genX(vertices), &haswell_cut_index, &gen8_pma_fix, -- git-series 0.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev