On Mon, May 01, 2017 at 06:43:23PM -0700, Rafael Antognolli wrote: > Some code that was placed in brw_draw_upload.c and exported to be used > by gen8+ was also moved to genX_state_upload, and the respective symbols > are not exported anymore. > > v2: > - Remove code from brw_draw_upload too > - Emit vertices for gen4-5 too. > - Use helper to setup brw_address (Kristian) > - Use macros for MOCS values. > - Do not use #ifndef NDEBUG on code that is actually used (Ken) > v3: > - Style and code clenup (Ken) > - Keep some of the common code inside brw_draw_upload.c (Ken) > > Signed-off-by: Rafael Antognolli <rafael.antogno...@intel.com>
There are some formatting nits further down but comparing to original I couldn't spot anything really missing. All in all looks cleaner than before :) Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_draw_upload.c | 454 +--------------- > src/mesa/drivers/dri/i965/brw_state.h | 2 +- > src/mesa/drivers/dri/i965/gen8_draw_upload.c | 330 +----------- > src/mesa/drivers/dri/i965/genX_state_upload.c | 560 ++++++++++++++++++- > 4 files changed, 556 insertions(+), 790 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c > b/src/mesa/drivers/dri/i965/brw_draw_upload.c > index 7846293..8b30151 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c > @@ -242,86 +242,6 @@ double_types(struct brw_context *brw, > : double_types_float[size]); > } > > -static bool > -is_passthru_format(uint32_t format) > -{ > - switch (format) { > - case ISL_FORMAT_R64_PASSTHRU: > - case ISL_FORMAT_R64G64_PASSTHRU: > - case ISL_FORMAT_R64G64B64_PASSTHRU: > - case ISL_FORMAT_R64G64B64A64_PASSTHRU: > - return true; > - default: > - return false; > - } > -} > - > -static int > -uploads_needed(uint32_t format) > -{ > - if (!is_passthru_format(format)) > - return 1; > - > - switch (format) { > - case ISL_FORMAT_R64_PASSTHRU: > - case ISL_FORMAT_R64G64_PASSTHRU: > - return 1; > - case ISL_FORMAT_R64G64B64_PASSTHRU: > - case ISL_FORMAT_R64G64B64A64_PASSTHRU: > - return 2; > - default: > - unreachable("not reached"); > - } > -} > - > -/* > - * Returns the number of componentes associated with a format that is used on > - * a 64 to 32 format split. See downsize_format() > - */ > -static int > -upload_format_size(uint32_t upload_format) > -{ > - switch (upload_format) { > - case ISL_FORMAT_R32G32_FLOAT: > - return 2; > - case ISL_FORMAT_R32G32B32A32_FLOAT: > - return 4; > - default: > - unreachable("not reached"); > - } > -} > - > -/* > - * Returns the format that we are finally going to use when upload a vertex > - * element. It will only change if we are using *64*PASSTHRU formats, as for > - * gen < 8 they need to be splitted on two *32*FLOAT formats. > - * > - * @upload points in which upload we are. Valid values are [0,1] > - */ > -static uint32_t > -downsize_format_if_needed(uint32_t format, > - int upload) > -{ > - assert(upload == 0 || upload == 1); > - > - if (!is_passthru_format(format)) > - return format; > - > - switch (format) { > - case ISL_FORMAT_R64_PASSTHRU: > - return ISL_FORMAT_R32G32_FLOAT; > - case ISL_FORMAT_R64G64_PASSTHRU: > - return ISL_FORMAT_R32G32B32A32_FLOAT; > - case ISL_FORMAT_R64G64B64_PASSTHRU: > - return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT > - : ISL_FORMAT_R32G32_FLOAT; > - case ISL_FORMAT_R64G64B64A64_PASSTHRU: > - return ISL_FORMAT_R32G32B32A32_FLOAT; > - default: > - unreachable("not reached"); > - } > -} > - > /** > * Given vertex array type/size/format/normalized info, return > * the appopriate hardware surface type. > @@ -786,380 +706,6 @@ brw_prepare_shader_draw_parameters(struct brw_context > *brw) > } > } > > -/** > - * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). > - */ > -uint32_t * > -brw_emit_vertex_buffer_state(struct brw_context *brw, > - unsigned buffer_nr, > - struct brw_bo *bo, > - unsigned start_offset, > - unsigned end_offset, > - unsigned stride, > - unsigned step_rate, > - uint32_t *__map) > -{ > - struct gl_context *ctx = &brw->ctx; > - uint32_t dw0; > - > - if (brw->gen >= 8) { > - dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT; > - } else if (brw->gen >= 6) { > - dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | > - (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA > - : GEN6_VB0_ACCESS_VERTEXDATA); > - } else { > - dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | > - (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA > - : BRW_VB0_ACCESS_VERTEXDATA); > - } > - > - if (brw->gen >= 7) > - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; > - > - switch (brw->gen) { > - case 7: > - dw0 |= GEN7_MOCS_L3 << 16; > - break; > - case 8: > - dw0 |= BDW_MOCS_WB << 16; > - break; > - case 9: > - dw0 |= SKL_MOCS_WB << 16; > - break; > - } > - > - WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), > - "VBO stride %d too large, bad rendering may occur\n", > - stride); > - OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); > - if (brw->gen >= 8) { > - OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); > - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - > - * Vertex Fetch (VF) Stage - State > - * > - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x > - * VBState.BufferPitch", the address of the byte immediately beyond the > - * last valid byte of the buffer is determined by > - * "VBState.StartingBufferAddress + VBState.BufferSize". > - */ > - OUT_BATCH(end_offset - start_offset); > - } else if (brw->gen >= 5) { > - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); > - /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry - > - * Vertex Fetch (VF) Stage - State > - * > - * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x > - * VBState.BufferPitch", the address of the byte immediately beyond > the > - * last valid byte of the buffer is determined by > - * "VBState.EndAddress + 1". > - */ > - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1); > - OUT_BATCH(step_rate); > - } else { > - OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset); > - OUT_BATCH(0); > - OUT_BATCH(step_rate); > - } > - > - return __map; > -} > - > -static void > -brw_emit_vertices(struct brw_context *brw) > -{ > - GLuint i; > - > - brw_prepare_vertices(brw); > - brw_prepare_shader_draw_parameters(brw); > - > - brw_emit_query_begin(brw); > - > - const struct brw_vs_prog_data *vs_prog_data = > - brw_vs_prog_data(brw->vs.base.prog_data); > - > - unsigned nr_elements = brw->vb.nr_enabled; > - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) > - ++nr_elements; > - if (vs_prog_data->uses_drawid) > - nr_elements++; > - > - /* If any of the formats of vb.enabled needs more that one upload, we need > - * to add it to nr_elements */ > - unsigned extra_uploads = 0; > - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { > - struct brw_vertex_element *input = brw->vb.enabled[i]; > - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); > - > - if (uploads_needed(format) > 1) > - extra_uploads++; > - } > - nr_elements += extra_uploads; > - > - /* If the VS doesn't read any inputs (calculating vertex position from > - * a state variable for some reason, for example), emit a single pad > - * VERTEX_ELEMENT struct and bail. > - * > - * The stale VB state stays in place, but they don't do anything unless > - * a VE loads from them. > - */ > - if (nr_elements == 0) { > - BEGIN_BATCH(3); > - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); > - if (brw->gen >= 6) { > - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | > - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); > - } else { > - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | > - BRW_VE0_VALID | > - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | > - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); > - } > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); > - ADVANCE_BATCH(); > - return; > - } > - > - /* Now emit VB and VEP state packets. > - */ > - > - const bool uses_draw_params = > - vs_prog_data->uses_basevertex || > - vs_prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + > - uses_draw_params + vs_prog_data->uses_drawid; > - > - if (nr_buffers) { > - if (brw->gen >= 6) { > - assert(nr_buffers <= 33); > - } else { > - assert(nr_buffers <= 17); > - } > - > - BEGIN_BATCH(1 + 4 * nr_buffers); > - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); > - for (i = 0; i < brw->vb.nr_buffers; i++) { > - struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; > - /* Prior to Haswell and Bay Trail we have to use 4-component formats > - * to fake 3-component ones. In particular, we do this for > - * half-float and 8 and 16-bit integer formats. This means that the > - * vertex element may poke over the end of the buffer by 2 bytes. > - */ > - unsigned padding = > - (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; > - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset, > - buffer->offset + buffer->size + padding, > - buffer->stride, buffer->step_rate); > - > - } > - > - if (uses_draw_params) { > - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, > - brw->draw.draw_params_bo, > - brw->draw.draw_params_offset, > - brw->draw.draw_params_bo->size, > - 0, /* stride */ > - 0); /* step rate */ > - } > - > - if (vs_prog_data->uses_drawid) { > - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, > - brw->draw.draw_id_bo, > - brw->draw.draw_id_offset, > - brw->draw.draw_id_bo->size, > - 0, /* stride */ > - 0); /* step rate */ > - } > - > - ADVANCE_BATCH(); > - } > - > - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, > presumably > - * for VertexID/InstanceID. > - */ > - if (brw->gen >= 6) { > - assert(nr_elements <= 34); > - } else { > - assert(nr_elements <= 18); > - } > - > - struct brw_vertex_element *gen6_edgeflag_input = NULL; > - > - BEGIN_BATCH(1 + nr_elements * 2); > - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); > - for (i = 0; i < brw->vb.nr_enabled; i++) { > - struct brw_vertex_element *input = brw->vb.enabled[i]; > - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); > - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; > - unsigned num_uploads = 1; > - unsigned c; > - > - num_uploads = uploads_needed(format); > - > - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { > - /* Gen6+ passes edgeflag as sideband along with the vertex, instead > - * of in the VUE. We have to upload it sideband as the last vertex > - * element according to the B-Spec. > - */ > - if (brw->gen >= 6) { > - gen6_edgeflag_input = input; > - continue; > - } > - } > - > - for (c = 0; c < num_uploads; c++) { > - uint32_t upload_format = downsize_format_if_needed(format, c); > - /* If we need more that one upload, the offset stride would be 128 > - * bits (16 bytes), as for previous uploads we are using the full > - * entry. */ > - unsigned int offset = input->offset + c * 16; > - int size = input->glarray->Size; > - > - if (is_passthru_format(format)) > - size = upload_format_size(upload_format); > - > - switch (size) { > - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; > - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; > - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; > - case 3: comp3 = input->glarray->Integer > - ? BRW_VE1_COMPONENT_STORE_1_INT > - : BRW_VE1_COMPONENT_STORE_1_FLT; > - break; > - } > - > - if (brw->gen >= 6) { > - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - (upload_format << BRW_VE0_FORMAT_SHIFT) | > - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); > - } else { > - OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | > - BRW_VE0_VALID | > - (upload_format << BRW_VE0_FORMAT_SHIFT) | > - (offset << BRW_VE0_SRC_OFFSET_SHIFT)); > - } > - > - if (brw->gen >= 5) > - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | > - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | > - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); > - else > - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | > - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | > - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | > - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); > - } > - } > - > - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > - vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { > - uint32_t dw0 = 0, dw1 = 0; > - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0; > - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0; > - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0; > - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0; > - > - if (vs_prog_data->uses_basevertex) > - comp0 = BRW_VE1_COMPONENT_STORE_SRC; > - > - if (vs_prog_data->uses_baseinstance) > - comp1 = BRW_VE1_COMPONENT_STORE_SRC; > - > - if (vs_prog_data->uses_vertexid) > - comp2 = BRW_VE1_COMPONENT_STORE_VID; > - > - if (vs_prog_data->uses_instanceid) > - comp3 = BRW_VE1_COMPONENT_STORE_IID; > - > - dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | > - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | > - (comp3 << BRW_VE1_COMPONENT_3_SHIFT); > - > - if (brw->gen >= 6) { > - dw0 |= GEN6_VE0_VALID | > - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | > - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; > - } else { > - dw0 |= BRW_VE0_VALID | > - brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT | > - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT; > - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; > - } > - > - /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, > - * the format is ignored and the value is always int. > - */ > - > - OUT_BATCH(dw0); > - OUT_BATCH(dw1); > - } > - > - if (vs_prog_data->uses_drawid) { > - uint32_t dw0 = 0, dw1 = 0; > - > - dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); > - > - if (brw->gen >= 6) { > - dw0 |= GEN6_VE0_VALID | > - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); > - } else { > - dw0 |= BRW_VE0_VALID | > - ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | > - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); > - > - dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; > - } > - > - OUT_BATCH(dw0); > - OUT_BATCH(dw1); > - } > - > - if (brw->gen >= 6 && gen6_edgeflag_input) { > - uint32_t format = > - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > - > - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - GEN6_VE0_EDGE_FLAG_ENABLE | > - (format << BRW_VE0_FORMAT_SHIFT) | > - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > - } > - > - ADVANCE_BATCH(); > -} > - > -const struct brw_tracked_state brw_vertices = { > - .dirty = { > - .mesa = _NEW_POLYGON, > - .brw = BRW_NEW_BATCH | > - BRW_NEW_BLORP | > - BRW_NEW_VERTICES | > - BRW_NEW_VS_PROG_DATA, > - }, > - .emit = brw_emit_vertices, > -}; > - > static void > brw_upload_indices(struct brw_context *brw) > { > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index 084f97f..acb7334 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -103,7 +103,6 @@ extern const struct brw_tracked_state brw_psp_urb_cbs; > > extern const struct brw_tracked_state brw_drawing_rect; > extern const struct brw_tracked_state brw_indices; > -extern const struct brw_tracked_state brw_vertices; > extern const struct brw_tracked_state brw_index_buffer; > extern const struct brw_tracked_state brw_cs_state; > extern const struct brw_tracked_state gen7_cs_push_constants; > @@ -125,7 +124,6 @@ extern const struct brw_tracked_state haswell_cut_index; > extern const struct brw_tracked_state gen8_index_buffer; > extern const struct brw_tracked_state gen8_multisample_state; > extern const struct brw_tracked_state gen8_pma_fix; > -extern const struct brw_tracked_state gen8_vertices; > extern const struct brw_tracked_state gen8_vf_topology; > extern const struct brw_tracked_state brw_cs_work_groups_surface; > > diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > index e81cca9..8db160b 100644 > --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > @@ -34,336 +34,6 @@ > #include "intel_batchbuffer.h" > #include "intel_buffer_objects.h" > > -#ifndef NDEBUG > -static bool > -is_passthru_format(uint32_t format) > -{ > - switch (format) { > - case ISL_FORMAT_R64_PASSTHRU: > - case ISL_FORMAT_R64G64_PASSTHRU: > - case ISL_FORMAT_R64G64B64_PASSTHRU: > - case ISL_FORMAT_R64G64B64A64_PASSTHRU: > - return true; > - default: > - return false; > - } > -} > -#endif > - > -static void > -gen8_emit_vertices(struct brw_context *brw) > -{ > - struct gl_context *ctx = &brw->ctx; > - bool uses_edge_flag; > - > - brw_prepare_vertices(brw); > - brw_prepare_shader_draw_parameters(brw); > - > - uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || > - ctx->Polygon.BackMode != GL_FILL); > - > - const struct brw_vs_prog_data *vs_prog_data = > - brw_vs_prog_data(brw->vs.base.prog_data); > - > - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { > - unsigned vue = brw->vb.nr_enabled; > - > - /* The element for the edge flags must always be last, so we have to > - * insert the SGVS before it in that case. > - */ > - if (uses_edge_flag) { > - assert(vue > 0); > - vue--; > - } > - > - WARN_ONCE(vue >= 33, > - "Trying to insert VID/IID past 33rd vertex element, " > - "need to reorder the vertex attrbutes."); > - > - unsigned dw1 = 0; > - if (vs_prog_data->uses_vertexid) { > - dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | > - (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel > */ > - (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); > - } > - > - if (vs_prog_data->uses_instanceid) { > - dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | > - (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel > */ > - (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); > - } > - > - BEGIN_BATCH(2); > - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); > - OUT_BATCH(dw1); > - ADVANCE_BATCH(); > - > - BEGIN_BATCH(3); > - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); > - OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); > - OUT_BATCH(0); > - ADVANCE_BATCH(); > - } else { > - BEGIN_BATCH(2); > - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); > - OUT_BATCH(0); > - ADVANCE_BATCH(); > - } > - > - /* Normally we don't need an element for the SGVS attribute because the > - * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in > an > - * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if > - * we're using draw parameters then we need an element for the those > - * values. Additionally if there is an edge flag element then the SGVS > - * can't be inserted past that so we need a dummy element to ensure that > - * the edge flag is the last one. > - */ > - const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || > - vs_prog_data->uses_baseinstance || > - ((vs_prog_data->uses_instanceid || > - vs_prog_data->uses_vertexid) && > - uses_edge_flag)); > - const unsigned nr_elements = > - brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; > - > - /* If the VS doesn't read any inputs (calculating vertex position from > - * a state variable for some reason, for example), emit a single pad > - * VERTEX_ELEMENT struct and bail. > - * > - * The stale VB state stays in place, but they don't do anything unless > - * a VE loads from them. > - */ > - if (nr_elements == 0) { > - BEGIN_BATCH(3); > - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); > - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | > - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_1_FLT << > BRW_VE1_COMPONENT_3_SHIFT)); > - ADVANCE_BATCH(); > - return; > - } > - > - /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ > - const bool uses_draw_params = > - vs_prog_data->uses_basevertex || > - vs_prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + > - uses_draw_params + vs_prog_data->uses_drawid; > - > - if (nr_buffers) { > - assert(nr_buffers <= 33); > - > - BEGIN_BATCH(1 + 4 * nr_buffers); > - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); > - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { > - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; > - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, > - buffer->offset, > - buffer->offset + buffer->size, > - buffer->stride, 0 /* unused */); > - } > - > - if (uses_draw_params) { > - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, > - brw->draw.draw_params_bo, > - brw->draw.draw_params_offset, > - brw->draw.draw_params_bo->size, > - 0 /* stride */, > - 0 /* unused */); > - } > - > - if (vs_prog_data->uses_drawid) { > - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, > - brw->draw.draw_id_bo, > - brw->draw.draw_id_offset, > - brw->draw.draw_id_bo->size, > - 0 /* stride */, > - 0 /* unused */); > - } > - ADVANCE_BATCH(); > - } > - > - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, > - * presumably for VertexID/InstanceID. > - */ > - assert(nr_elements <= 34); > - > - struct brw_vertex_element *gen6_edgeflag_input = NULL; > - > - BEGIN_BATCH(1 + nr_elements * 2); > - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); > - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { > - struct brw_vertex_element *input = brw->vb.enabled[i]; > - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); > - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; > - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; > - > - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): > - * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an > - * element which has edge flag enabled." > - */ > - assert(!(is_passthru_format(format) && uses_edge_flag)); > - > - /* The gen4 driver expects edgeflag to come in as a float, and passes > - * that float on to the tests in the clipper. Mesa's current vertex > - * attribute value for EdgeFlag is stored as a float, which works out. > - * glEdgeFlagPointer, on the other hand, gives us an unnormalized > - * integer ubyte. Just rewrite that to convert to a float. > - */ > - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { > - /* Gen6+ passes edgeflag as sideband along with the vertex, instead > - * of in the VUE. We have to upload it sideband as the last vertex > - * element according to the B-Spec. > - */ > - gen6_edgeflag_input = input; > - continue; > - } > - > - switch (input->glarray->Size) { > - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; > - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; > - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; > - case 3: > - if (input->glarray->Doubles) { > - comp3 = BRW_VE1_COMPONENT_STORE_0; > - } else if (input->glarray->Integer) { > - comp3 = BRW_VE1_COMPONENT_STORE_1_INT; > - } else { > - comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; > - } > - > - break; > - } > - > - /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): > - * > - * "When SourceElementFormat is set to one of the *64*_PASSTHRU > - * formats, 64-bit components are stored in the URB without any > - * conversion. In this case, vertex elements must be written as 128 > - * or 256 bits, with VFCOMP_STORE_0 being used to pad the output > - * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red > - * component into the URB, Component 1 must be specified as > - * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) > - * in order to output a 128-bit vertex element, or Components 1-3 > must > - * be specified as VFCOMP_STORE_0 in order to output a 256-bit > vertex > - * element. Likewise, use of R64G64B64_PASSTHRU requires Component > 3 > - * to be specified as VFCOMP_STORE_0 in order to output a 256-bit > vertex > - * element." > - */ > - if (input->glarray->Doubles && !input->is_dual_slot) { > - /* Store vertex elements which correspond to double and dvec2 vertex > - * shader inputs as 128-bit vertex elements, instead of 256-bits. > - */ > - comp2 = BRW_VE1_COMPONENT_NOSTORE; > - comp3 = BRW_VE1_COMPONENT_NOSTORE; > - } > - > - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - (format << BRW_VE0_FORMAT_SHIFT) | > - (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); > - > - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | > - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | > - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); > - } > - > - if (needs_sgvs_element) { > - if (vs_prog_data->uses_basevertex || > - vs_prog_data->uses_baseinstance) { > - OUT_BATCH(GEN6_VE0_VALID | > - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | > - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << > BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_SRC << > BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > - } else { > - OUT_BATCH(GEN6_VE0_VALID); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > - } > - } > - > - if (vs_prog_data->uses_drawid) { > - OUT_BATCH(GEN6_VE0_VALID | > - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > - } > - > - if (gen6_edgeflag_input) { > - uint32_t format = > - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > - > - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | > - GEN6_VE0_VALID | > - GEN6_VE0_EDGE_FLAG_ENABLE | > - (format << BRW_VE0_FORMAT_SHIFT) | > - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); > - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > - } > - ADVANCE_BATCH(); > - > - for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { > - const struct brw_vertex_element *input = brw->vb.enabled[i]; > - const struct brw_vertex_buffer *buffer = > &brw->vb.buffers[input->buffer]; > - unsigned element_index; > - > - /* The edge flag element is reordered to be the last one in the code > - * above so we need to compensate for that in the element indices used > - * below. > - */ > - if (input == gen6_edgeflag_input) > - element_index = nr_elements - 1; > - else > - element_index = j++; > - > - BEGIN_BATCH(3); > - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); > - OUT_BATCH(element_index | > - (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); > - OUT_BATCH(buffer->step_rate); > - ADVANCE_BATCH(); > - } > - > - if (vs_prog_data->uses_drawid) { > - const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; > - BEGIN_BATCH(3); > - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); > - OUT_BATCH(element); > - OUT_BATCH(0); > - ADVANCE_BATCH(); > - } > -} > - > -const struct brw_tracked_state gen8_vertices = { > - .dirty = { > - .mesa = _NEW_POLYGON, > - .brw = BRW_NEW_BATCH | > - BRW_NEW_BLORP | > - BRW_NEW_VERTICES | > - BRW_NEW_VS_PROG_DATA, > - }, > - .emit = gen8_emit_vertices, > -}; > - > static void > gen8_emit_index_buffer(struct brw_context *brw) > { > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c > b/src/mesa/drivers/dri/i965/genX_state_upload.c > index 3e6ffbd..df05b51 100644 > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c > @@ -26,10 +26,16 @@ > #include "common/gen_device_info.h" > #include "genxml/gen_macros.h" > > +#include "main/bufferobj.h" > +#include "main/context.h" > +#include "main/enums.h" > +#include "main/macros.h" > + > #include "brw_context.h" > #if GEN_GEN == 6 > #include "brw_defines.h" > #endif > +#include "brw_draw.h" > #include "brw_state.h" > #include "brw_wm.h" > #include "brw_util.h" > @@ -125,6 +131,17 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) > }; > } > > +static inline struct brw_address > +vertex_bo(struct brw_bo *bo, uint32_t offset) > +{ > + return (struct brw_address) { > + .bo = bo, > + .offset = offset, > + .read_domains = I915_GEM_DOMAIN_VERTEX, > + .write_domain = 0, > + }; > +} > + > #include "genxml/genX_pack.h" > > #define _brw_cmd_length(cmd) cmd ## _length > @@ -158,6 +175,541 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) > _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ > _dst = NULL) > > +static uint32_t * > +genX(emit_vertex_buffer_state)(struct brw_context *brw, > + uint32_t *dw, > + unsigned buffer_nr, > + struct brw_bo *bo, > + unsigned start_offset, > + unsigned end_offset, > + unsigned stride, > + unsigned step_rate) > +{ > + struct GENX(VERTEX_BUFFER_STATE) buf_state = { > + .VertexBufferIndex = buffer_nr, > + .BufferPitch = stride, > + .BufferStartingAddress = vertex_bo(bo, start_offset), > +#if GEN_GEN >= 8 > + .BufferSize = end_offset - start_offset, > +#endif > + > +#if GEN_GEN >= 7 > + .AddressModifyEnable = true, > +#endif > + > +#if GEN_GEN < 8 > + .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, > + .InstanceDataStepRate = step_rate, > +#if GEN_GEN >= 5 > + .EndAddress = vertex_bo(bo, end_offset - 1), > +#endif > +#endif > + > +#if GEN_GEN == 9 > + .VertexBufferMOCS = SKL_MOCS_WB, > +#elif GEN_GEN == 8 > + .VertexBufferMOCS = BDW_MOCS_WB, > +#elif GEN_GEN == 7 > + .VertexBufferMOCS = GEN7_MOCS_L3, > +#endif > + }; > + > + GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); > + return dw + GENX(VERTEX_BUFFER_STATE_length); > +} > + > +UNUSED static bool > +is_passthru_format(uint32_t format) > +{ > + switch (format) { > + case ISL_FORMAT_R64_PASSTHRU: > + case ISL_FORMAT_R64G64_PASSTHRU: > + case ISL_FORMAT_R64G64B64_PASSTHRU: > + case ISL_FORMAT_R64G64B64A64_PASSTHRU: > + return true; > + default: > + return false; > + } > +} > + > +UNUSED static int > +genX(uploads_needed)(uint32_t format) As you mark this with UNUSED, does this need to be genX()-wrapped? > +{ > + if (!is_passthru_format(format)) > + return 1; > + > + switch (format) { > + case ISL_FORMAT_R64_PASSTHRU: > + case ISL_FORMAT_R64G64_PASSTHRU: > + return 1; > + case ISL_FORMAT_R64G64B64_PASSTHRU: > + case ISL_FORMAT_R64G64B64A64_PASSTHRU: > + return 2; > + default: > + unreachable("not reached"); > + } > +} > + > +/* > + * Returns the format that we are finally going to use when upload a vertex > + * element. It will only change if we are using *64*PASSTHRU formats, as for > + * gen < 8 they need to be splitted on two *32*FLOAT formats. > + * > + * @upload points in which upload we are. Valid values are [0,1] > + */ > +static uint32_t > +downsize_format_if_needed(uint32_t format, > + int upload) > +{ > + assert(upload == 0 || upload == 1); > + > + if (!is_passthru_format(format)) > + return format; > + > + switch (format) { > + case ISL_FORMAT_R64_PASSTHRU: > + return ISL_FORMAT_R32G32_FLOAT; > + case ISL_FORMAT_R64G64_PASSTHRU: > + return ISL_FORMAT_R32G32B32A32_FLOAT; > + case ISL_FORMAT_R64G64B64_PASSTHRU: > + return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT > + : ISL_FORMAT_R32G32_FLOAT; > + case ISL_FORMAT_R64G64B64A64_PASSTHRU: > + return ISL_FORMAT_R32G32B32A32_FLOAT; > + default: > + unreachable("not reached"); > + } > +} > + > +/* > + * Returns the number of componentes associated with a format that is used on > + * a 64 to 32 format split. See downsize_format() > + */ > +static int > +upload_format_size(uint32_t upload_format) > +{ > + switch (upload_format) { > + case ISL_FORMAT_R32G32_FLOAT: > + return 2; > + case ISL_FORMAT_R32G32B32A32_FLOAT: > + return 4; > + default: > + unreachable("not reached"); > + } > +} > + > +static void > +genX(emit_vertices)(struct brw_context *brw) > +{ > + uint32_t *dw; > + > + brw_prepare_vertices(brw); > + brw_prepare_shader_draw_parameters(brw); > + > +#if GEN_GEN < 6 > + brw_emit_query_begin(brw); > +#endif > + > + const struct brw_vs_prog_data *vs_prog_data = > + brw_vs_prog_data(brw->vs.base.prog_data); > + > +#if GEN_GEN >= 8 > + struct gl_context *ctx = &brw->ctx; > + bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || Could be const. > + ctx->Polygon.BackMode != GL_FILL); > + > + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { > + unsigned vue = brw->vb.nr_enabled; > + > + /* The element for the edge flags must always be last, so we have to > + * insert the SGVS before it in that case. > + */ > + if (uses_edge_flag) { > + assert(vue > 0); > + vue--; > + } > + > + WARN_ONCE(vue >= 33, > + "Trying to insert VID/IID past 33rd vertex element, " > + "need to reorder the vertex attrbutes."); > + > + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { > + if (vs_prog_data->uses_vertexid) { > + vfs.VertexIDEnable = true; > + vfs.VertexIDComponentNumber = 2; > + vfs.VertexIDElementOffset = vue; > + } > + > + if (vs_prog_data->uses_instanceid) { > + vfs.InstanceIDEnable = true; > + vfs.InstanceIDComponentNumber = 3; > + vfs.InstanceIDElementOffset = vue; > + } > + } > + > + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { > + vfi.InstancingEnable = true; > + vfi.VertexElementIndex = vue; > + } > + } else { > + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); > + } > + > + /* Normally we don't need an element for the SGVS attribute because the > + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in > an > + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if > + * we're using draw parameters then we need an element for the those > + * values. Additionally if there is an edge flag element then the SGVS > + * can't be inserted past that so we need a dummy element to ensure that > + * the edge flag is the last one. > + */ > + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || > + vs_prog_data->uses_baseinstance || > + ((vs_prog_data->uses_instanceid || > + vs_prog_data->uses_vertexid) > + && uses_edge_flag)); > +#else > + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || > + vs_prog_data->uses_baseinstance || > + vs_prog_data->uses_instanceid || > + vs_prog_data->uses_vertexid); > +#endif > + unsigned nr_elements = > + brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; > + > +#if GEN_GEN < 8 > + /* If any of the formats of vb.enabled needs more that one upload, we need > + * to add it to nr_elements > + */ > + for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { > + struct brw_vertex_element *input = brw->vb.enabled[i]; > + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); > + > + if (genX(uploads_needed(format)) > 1) > + nr_elements++; > + } > +#endif > + > + /* If the VS doesn't read any inputs (calculating vertex position from > + * a state variable for some reason, for example), emit a single pad > + * VERTEX_ELEMENT struct and bail. > + * > + * The stale VB state stays in place, but they don't do anything unless > + * a VE loads from them. > + */ > + if (nr_elements == 0) { > + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 1 + > GENX(VERTEX_ELEMENT_STATE_length)); Wrap overflowing "1 + GENX(..." to next line. > + struct GENX(VERTEX_ELEMENT_STATE) elem = { > + .Valid = true, > + .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT, > + .Component0Control = VFCOMP_STORE_0, > + .Component1Control = VFCOMP_STORE_0, > + .Component2Control = VFCOMP_STORE_0, > + .Component3Control = VFCOMP_STORE_1_FP, > + }; > + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); > + return; > + } > + > + /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ > + const bool uses_draw_params = > + vs_prog_data->uses_basevertex || > + vs_prog_data->uses_baseinstance; > + const unsigned nr_buffers = brw->vb.nr_buffers + > + uses_draw_params + vs_prog_data->uses_drawid; > + > + if (nr_buffers) { > +#if GEN_GEN >= 6 > + assert(nr_buffers <= 33); > +#else > + assert(nr_buffers <= 17); > +#endif Either drop these five lines or the one below :) > + assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17)); > + > + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), > + 1 + GENX(VERTEX_BUFFER_STATE_length) * > nr_buffers); > + > + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { > + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; > + /* Prior to Haswell and Bay Trail we have to use 4-component formats > + * to fake 3-component ones. In particular, we do this for > + * half-float and 8 and 16-bit integer formats. This means that the > + * vertex element may poke over the end of the buffer by 2 bytes. > + */ > + unsigned padding = > + (GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; Could be const. And if we added: const unsigned end = buffer->offset + buffer->size + padding; > + dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, > + buffer->offset, > + buffer->offset + buffer->size + > padding, we could use it here and avoid overflowing the line. > + buffer->stride, > + buffer->step_rate); > + } > + > + if (uses_draw_params) { > + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, > + brw->draw.draw_params_bo, > + brw->draw.draw_params_offset, > + brw->draw.draw_params_bo->size, > + 0 /* stride */, > + 0 /* step rate */); > + } > + > + if (vs_prog_data->uses_drawid) { > + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, > + brw->draw.draw_id_bo, > + brw->draw.draw_id_offset, > + brw->draw.draw_id_bo->size, > + 0 /* stride */, > + 0 /* step rate */); > + } > + } > + > + /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, > + * presumably for VertexID/InstanceID. > + */ > +#if GEN_GEN >= 6 > + assert(nr_elements <= 34); > + struct brw_vertex_element *gen6_edgeflag_input = NULL; Could be const, contents is only used for reading. > +#else > + assert(nr_elements <= 18); > +#endif > + > + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), > + 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); > + unsigned i; > + for (i = 0; i < brw->vb.nr_enabled; i++) { > + struct brw_vertex_element *input = brw->vb.enabled[i]; Could be const. > + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); > + uint32_t comp0 = VFCOMP_STORE_SRC; > + uint32_t comp1 = VFCOMP_STORE_SRC; > + uint32_t comp2 = VFCOMP_STORE_SRC; > + uint32_t comp3 = VFCOMP_STORE_SRC; > + unsigned num_uploads = 1; Would this be a little simpler (dropping the update below): const unsigned num_uploads = GEN_GEN < 8 ? genX(uploads_needed(format)) : 1; > + > +#if GEN_GEN >= 8 > + /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): > + * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an > + * element which has edge flag enabled." > + */ > + assert(!(is_passthru_format(format) && uses_edge_flag)); > +#endif > + > + /* The gen4 driver expects edgeflag to come in as a float, and passes > + * that float on to the tests in the clipper. Mesa's current vertex > + * attribute value for EdgeFlag is stored as a float, which works out. > + * glEdgeFlagPointer, on the other hand, gives us an unnormalized > + * integer ubyte. Just rewrite that to convert to a float. > + * > + * Gen6+ passes edgeflag as sideband along with the vertex, instead > + * of in the VUE. We have to upload it sideband as the last vertex > + * element according to the B-Spec. > + */ > +#if GEN_GEN >= 6 > + if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { > + gen6_edgeflag_input = input; > + continue; > + } > +#endif > + > +#if GEN_GEN < 8 > + num_uploads = genX(uploads_needed(format)); > +#endif > + > + for (unsigned c = 0; c < num_uploads; c++) { > + uint32_t upload_format = GEN_GEN >= 8 ? format : Could be const. > + downsize_format_if_needed(format, c); > + /* If we need more that one upload, the offset stride would be 128 > + * bits (16 bytes), as for previous uploads we are using the full > + * entry. */ > + unsigned int offset = input->offset + c * 16; Could be const and simply "unsigned". > + int size = input->glarray->Size; > + > + if (GEN_GEN < 8 && is_passthru_format(format)) > + size = upload_format_size(upload_format); Matter of taste but could be also: const int size = (GEN_GEN < 8 && is_passthru_format(format)) ? upload_format_size(upload_format) : input->glarray->Size; > + > + switch (size) { > + case 0: comp0 = VFCOMP_STORE_0; > + case 1: comp1 = VFCOMP_STORE_0; > + case 2: comp2 = VFCOMP_STORE_0; > + case 3: > + if (GEN_GEN >= 8 && input->glarray->Doubles) { > + comp3 = VFCOMP_STORE_0; > + } else if (input->glarray->Integer) { > + comp3 = VFCOMP_STORE_1_INT; > + } else { > + comp3 = VFCOMP_STORE_1_FP; > + } > + > + break; > + } > + > +#if GEN_GEN >= 8 > + /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): > + * > + * "When SourceElementFormat is set to one of the *64*_PASSTHRU > + * formats, 64-bit components are stored in the URB without any > + * conversion. In this case, vertex elements must be written as > 128 > + * or 256 bits, with VFCOMP_STORE_0 being used to pad the > output as > + * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red > + * component into the URB, Component 1 must be specified as > + * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in > + * order to output a 128-bit vertex element, or Components 1-3 > must > + * be specified as VFCOMP_STORE_0 in order to output a 256-bit > vertex > + * element. Likewise, use of R64G64B64_PASSTHRU requires > Component 3 > + * to be specified as VFCOMP_STORE_0 in order to output a > 256-bit > + * vertex element." > + */ > + if (input->glarray->Doubles && !input->is_dual_slot) { > + /* Store vertex elements which correspond to double and dvec2 > vertex > + * shader inputs as 128-bit vertex elements, instead of 256-bits. > + */ > + comp2 = VFCOMP_NOSTORE; > + comp3 = VFCOMP_NOSTORE; > + } > +#endif > + > + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { > + .VertexBufferIndex = input->buffer, > + .Valid = true, > + .SourceElementFormat = upload_format, > + .SourceElementOffset = offset, > + .Component0Control = comp0, > + .Component1Control = comp1, > + .Component2Control = comp2, > + .Component3Control = comp3, > +#if GEN_GEN < 5 > + .DestinationElementOffset = i * 4, > +#endif > + }; > + > + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); > + dw += GENX(VERTEX_ELEMENT_STATE_length); > + } > + } > + > + if (needs_sgvs_element) { > + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { > + .Valid = true, > + .Component0Control = VFCOMP_STORE_0, > + .Component1Control = VFCOMP_STORE_0, > + .Component2Control = VFCOMP_STORE_0, > + .Component3Control = VFCOMP_STORE_0, > +#if GEN_GEN < 5 > + .DestinationElementOffset = i * 4, This is how original had it also. I'm just thinking should we use instead: .DestinationElementOffset = brw->vb.nr_buffers * 4, At this point i == brw->vb.nr_buffers always holds, right? > +#endif > + }; > + > +#if GEN_GEN >= 8 > + if (vs_prog_data->uses_basevertex || > + vs_prog_data->uses_baseinstance) { > + elem_state.VertexBufferIndex = brw->vb.nr_buffers; > + elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; > + elem_state.Component0Control = VFCOMP_STORE_SRC; > + elem_state.Component1Control = VFCOMP_STORE_SRC; > + } > +#else > + elem_state.VertexBufferIndex = brw->vb.nr_buffers; > + elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT; > + if (vs_prog_data->uses_basevertex) > + elem_state.Component0Control = VFCOMP_STORE_SRC; > + > + if (vs_prog_data->uses_baseinstance) > + elem_state.Component1Control = VFCOMP_STORE_SRC; > + > + if (vs_prog_data->uses_vertexid) > + elem_state.Component2Control = VFCOMP_STORE_VID; > + > + if (vs_prog_data->uses_instanceid) > + elem_state.Component3Control = VFCOMP_STORE_IID; > +#endif > + > + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); > + dw += GENX(VERTEX_ELEMENT_STATE_length); > + } > + > + if (vs_prog_data->uses_drawid) { > + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { > + .Valid = true, > + .VertexBufferIndex = brw->vb.nr_buffers + 1, > + .SourceElementFormat = ISL_FORMAT_R32_UINT, > + .Component0Control = VFCOMP_STORE_SRC, > + .Component1Control = VFCOMP_STORE_0, > + .Component2Control = VFCOMP_STORE_0, > + .Component3Control = VFCOMP_STORE_0, > +#if GEN_GEN < 5 > + .DestinationElementOffset = i * 4, Same comment as further up. > +#endif > + }; > + > + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); > + dw += GENX(VERTEX_ELEMENT_STATE_length); > + } > + > +#if GEN_GEN >= 6 > + if (gen6_edgeflag_input) { > + uint32_t format = Could be const. > + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > + > + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { > + .Valid = true, > + .VertexBufferIndex = gen6_edgeflag_input->buffer, > + .EdgeFlagEnable = true, > + .SourceElementFormat = format, > + .SourceElementOffset = gen6_edgeflag_input->offset, > + .Component0Control = VFCOMP_STORE_SRC, > + .Component1Control = VFCOMP_STORE_0, > + .Component2Control = VFCOMP_STORE_0, > + .Component3Control = VFCOMP_STORE_0, > + }; > + > + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); > + dw += GENX(VERTEX_ELEMENT_STATE_length); > + } > +#endif > + > +#if GEN_GEN >= 8 > + for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { > + const struct brw_vertex_element *input = brw->vb.enabled[i]; > + const struct brw_vertex_buffer *buffer = > &brw->vb.buffers[input->buffer]; > + unsigned element_index; > + > + /* The edge flag element is reordered to be the last one in the code > + * above so we need to compensate for that in the element indices used > + * below. > + */ > + if (input == gen6_edgeflag_input) > + element_index = nr_elements - 1; > + else > + element_index = j++; > + > + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { > + vfi.VertexElementIndex = element_index; > + vfi.InstancingEnable = buffer->step_rate != 0; > + vfi.InstanceDataStepRate = buffer->step_rate; > + } > + } > + > + if (vs_prog_data->uses_drawid) { > + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; > + > + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { > + vfi.VertexElementIndex = element; > + } > + } > +#endif > +} > + > +static const struct brw_tracked_state genX(vertices) = { > + .dirty = { > + .mesa = _NEW_POLYGON, > + .brw = BRW_NEW_BATCH | > + BRW_NEW_BLORP | > + BRW_NEW_VERTICES | > + BRW_NEW_VS_PROG_DATA, > + }, > + .emit = genX(emit_vertices), > +}; > + > #if GEN_GEN >= 6 > /** > * Determine the appropriate attribute override value to store into the > @@ -3004,7 +3556,7 @@ genX(init_atoms)(struct brw_context *brw) > &brw_drawing_rect, > &brw_indices, /* must come before brw_vertices */ > &brw_index_buffer, > - &brw_vertices, > + &genX(vertices), > > &brw_constant_buffer > }; > @@ -3071,7 +3623,7 @@ genX(init_atoms)(struct brw_context *brw) > > &brw_indices, /* must come before brw_vertices */ > &brw_index_buffer, > - &brw_vertices, > + &genX(vertices), > }; > #elif GEN_GEN == 7 > static const struct brw_tracked_state *render_atoms[] = > @@ -3159,7 +3711,7 @@ genX(init_atoms)(struct brw_context *brw) > > &brw_indices, /* must come before brw_vertices */ > &brw_index_buffer, > - &brw_vertices, > + &genX(vertices), > > &haswell_cut_index, > }; > @@ -3252,7 +3804,7 @@ genX(init_atoms)(struct brw_context *brw) > > &brw_indices, > &gen8_index_buffer, > - &gen8_vertices, > + &genX(vertices), > > &haswell_cut_index, > &gen8_pma_fix, > -- > git-series 0.9.1 > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev