On 12/15/2015 12:28 AM, Kristian Høgsberg Kristensen wrote: > We have to break open a new vec4 for gl_DrawIDARB. We've used up all > space in the vec4 we use for SGVS and gl_DrawIDARB has to come from its > own separate vertex buffer anyway. This is because we point the vb for > base vertex and base instance into the draw parameter BO for indirect > draw calls, but the draw id is generated by mesa in a different buffer. > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 1 + > src/mesa/drivers/dri/i965/brw_context.h | 9 +++++ > src/mesa/drivers/dri/i965/brw_draw.c | 8 ++-- > src/mesa/drivers/dri/i965/brw_draw_upload.c | 45 > ++++++++++++++++++++++- > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 + > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 10 ++++- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++ > src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 +++- > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++++- > src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 5 +++ > src/mesa/drivers/dri/i965/gen8_draw_upload.c | 34 ++++++++++++++++- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 12 files changed, 132 insertions(+), 11 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index 58ee966..2333f4a 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -549,6 +549,7 @@ struct brw_vs_prog_data { > bool uses_instanceid; > bool uses_basevertex; > bool uses_baseinstance; > + bool uses_drawid; > }; > > struct brw_tcs_prog_data > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 1378402..97ebf06 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -919,6 +919,15 @@ struct brw_context > */ > drm_intel_bo *draw_params_bo; > uint32_t draw_params_offset; > + > + /** > + * The value of gl_DrawID for the current _mesa_prim. This always comes > + * in from it's own vertex buffer since it's not part of the indirect > + * draw parameters. > + */ > + int gl_drawid; > + drm_intel_bo *draw_id_bo; > + uint32_t draw_id_offset; > } draw; > > struct { > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 298ac06..b0710c67 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -513,11 +513,9 @@ brw_try_draw_prims(struct gl_context *ctx, > > /* gl_DrawID always needs its own vertex buffer since it's not part of > * the indirect parameter buffer. */
The */ goes on its own line. > - if (brw->vs.prog_data->uses_drawid) { > - brw->draw.gl_drawid = prims[i].drawid; > - drm_intel_bo_unreference(brw->draw.draw_id_bo); > - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; > - } > + brw->draw.gl_drawid = prims[i].draw_id; > + drm_intel_bo_unreference(brw->draw.draw_id_bo); > + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; The previous patch (incorrectly) added this block, and it seems like this should be conditional on uses_drawid. > > if (brw->gen < 6) > brw_set_prim(brw, &prims[i]); > diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c > b/src/mesa/drivers/dri/i965/brw_draw_upload.c > index ccf963c..e601190 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c > @@ -599,6 +599,12 @@ brw_prepare_shader_draw_parameters(struct brw_context > *brw) > &brw->draw.draw_params_bo, > &brw->draw.draw_params_offset); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + intel_upload_data(brw, &brw->draw.gl_drawid, > sizeof(brw->draw.gl_drawid), 4, > + &brw->draw.draw_id_bo, > + &brw->draw.draw_id_offset); > + } > } > > /** > @@ -663,6 +669,8 @@ brw_emit_vertices(struct brw_context *brw) > if (brw->vs.prog_data->uses_vertexid || > brw->vs.prog_data->uses_instanceid || > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance) > ++nr_elements; > + if (brw->vs.prog_data->uses_drawid) > + nr_elements++; > > /* If the VS doesn't read any inputs (calculating vertex position from > * a state variable for some reason, for example), emit a single pad > @@ -699,7 +707,8 @@ brw_emit_vertices(struct brw_context *brw) > const bool uses_draw_params = > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params; > + const unsigned nr_buffers = brw->vb.nr_buffers + > + uses_draw_params + brw->vs.prog_data->uses_drawid; > > if (nr_buffers) { > if (brw->gen >= 6) { > @@ -726,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw) > 0, /* stride */ > 0); /* step rate */ > } > + > + if (brw->vs.prog_data->uses_drawid) { > + EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, > + brw->draw.draw_id_bo, > + brw->draw.draw_id_bo->size - 1, > + brw->draw.draw_id_offset, > + 0, /* stride */ > + 0); /* step rate */ > + } > + > ADVANCE_BATCH(); > } > > @@ -839,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw) > OUT_BATCH(dw1); > } > > + if (brw->vs.prog_data->uses_drawid) { > + uint32_t dw0 = 0, dw1 = 0; > + > + dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); > + > + if (brw->gen >= 6) { > + dw0 |= GEN6_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); > + } else { > + dw0 |= BRW_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); If we restrict this extension to core profile (i.e., only when ARB_draw_indirect is available), we can delete a bunch of this. We only do ARB_draw_indirect on GEN7+. > + > + dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; Mixed tabs and spaces. > + } > + > + OUT_BATCH(dw0); > + OUT_BATCH(dw1); > + } > + > if (brw->gen >= 6 && gen6_edgeflag_input) { > uint32_t format = > brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 918bc0f..359e138 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1674,6 +1674,8 @@ fs_visitor::assign_vs_urb_setup() > if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) > count++; > + if (vs_prog_data->uses_drawid) > + count++; > > /* Each attribute is 4 regs. */ > this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 2e4937e..9e9c700 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -228,6 +228,13 @@ emit_system_values_block(nir_block *block, void > *void_visitor) > *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE); > break; > > + case nir_intrinsic_load_draw_id: > + assert(v->stage == MESA_SHADER_VERTEX); > + reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID]; > + if (reg->file == BAD_FILE) > + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID); > + break; > + > case nir_intrinsic_load_invocation_id: > assert(v->stage == MESA_SHADER_GEOMETRY); > reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; > @@ -1739,7 +1746,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, > case nir_intrinsic_load_vertex_id_zero_base: > case nir_intrinsic_load_base_vertex: > case nir_intrinsic_load_instance_id: > - case nir_intrinsic_load_base_instance: { > + case nir_intrinsic_load_base_instance: > + case nir_intrinsic_load_draw_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); > fs_reg val = nir_system_values[sv]; > assert(val.file != BAD_FILE); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index ae98de1..21492ac 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -59,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location) > reg->reg_offset = 3; > vs_prog_data->uses_instanceid = true; > break; > + case SYSTEM_VALUE_DRAW_ID: > + if (nir->info.system_values_read & > + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) | > + BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | > + BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | > + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) > + reg->nr += 4; > + reg->reg_offset = 0; > + vs_prog_data->uses_drawid = true; > + break; > default: > unreachable("not reached"); > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 71c0f9f..34f37a3 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -1555,7 +1555,7 @@ int > vec4_vs_visitor::setup_attributes(int payload_reg) > { > int nr_attributes; > - int attribute_map[VERT_ATTRIB_MAX + 1]; > + int attribute_map[VERT_ATTRIB_MAX + 2]; > memset(attribute_map, 0, sizeof(attribute_map)); > > nr_attributes = 0; > @@ -1566,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) > } > } > > + if (vs_prog_data->uses_drawid) { > + attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; > + nr_attributes++; > + } > + > /* VertexID is stored by the VF as the last vertex element, but we > * don't represent it with a flag in inputs_read, so we call it > * VERT_ATTRIB_MAX. > @@ -1573,6 +1578,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg) > if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { > attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; > + nr_attributes++; > } > > lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index c6f07d5..e3901b8 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -85,6 +85,13 @@ > vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) > glsl_type::int_type); > break; > > + case nir_intrinsic_load_draw_id: > + reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID]; > + if (reg->file == BAD_FILE) > + *reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID, > + glsl_type::int_type); > + break; > + > default: > break; > } > @@ -658,7 +665,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > case nir_intrinsic_load_vertex_id_zero_base: > case nir_intrinsic_load_base_vertex: > case nir_intrinsic_load_instance_id: > - case nir_intrinsic_load_base_instance: { > + case nir_intrinsic_load_base_instance: > + case nir_intrinsic_load_draw_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); > src_reg val = src_reg(nir_system_values[sv]); > assert(val.file != BAD_FILE); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > index bd6a9a4..1d69149 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > @@ -170,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location, > reg->writemask = WRITEMASK_W; > vs_prog_data->uses_instanceid = true; > break; > + case SYSTEM_VALUE_DRAW_ID: > + reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1); > + reg->writemask = WRITEMASK_X; > + vs_prog_data->uses_drawid = true; > + break; > default: > unreachable("not reached"); > } > diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > index 451cf0b..ff89e5f 100644 > --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > @@ -118,7 +118,8 @@ gen8_emit_vertices(struct brw_context *brw) > const bool uses_draw_params = > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params; > + const unsigned nr_buffers = brw->vb.nr_buffers + > + uses_draw_params + brw->vs.prog_data->uses_drawid; > > if (nr_buffers) { > assert(nr_buffers <= 33); > @@ -147,6 +148,15 @@ gen8_emit_vertices(struct brw_context *brw) > brw->draw.draw_params_offset); > OUT_BATCH(brw->draw.draw_params_bo->size); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT | > + GEN7_VB0_ADDRESS_MODIFYENABLE | > + mocs_wb << 16); > + OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0, > + brw->draw.draw_id_offset); > + OUT_BATCH(brw->draw.draw_id_bo->size); > + } > ADVANCE_BATCH(); > } > > @@ -163,7 +173,8 @@ gen8_emit_vertices(struct brw_context *brw) > ((brw->vs.prog_data->uses_instanceid || > brw->vs.prog_data->uses_vertexid) && > uses_edge_flag)); > - const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element; > + const unsigned nr_elements = > + brw->vb.nr_enabled + needs_sgvs_element + > brw->vs.prog_data->uses_drawid; > > /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, > * presumably for VertexID/InstanceID. > @@ -236,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw) > } > } > > + if (brw->vs.prog_data->uses_drawid) { > + OUT_BATCH(GEN6_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); > + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > + } > + > if (gen6_edgeflag_input) { > uint32_t format = > brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > @@ -273,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw) > OUT_BATCH(buffer->step_rate); > ADVANCE_BATCH(); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; > + BEGIN_BATCH(3); > + OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); > + OUT_BATCH(element); > + OUT_BATCH(0); > + ADVANCE_BATCH(); > + } > } > > const struct brw_tracked_state gen8_vertices = { > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index 24761a7..c95bfbd 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx) > ctx->Extensions.ARB_point_sprite = true; > ctx->Extensions.ARB_seamless_cube_map = true; > ctx->Extensions.ARB_shader_bit_encoding = true; > + ctx->Extensions.ARB_shader_draw_parameters = true; Depending on what we decide about the ARB_draw_indirect dependency, this may need to move. > ctx->Extensions.ARB_shader_texture_lod = true; > ctx->Extensions.ARB_shadow = true; > ctx->Extensions.ARB_sync = true; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev