On Wed, Dec 16, 2015 at 3:47 PM, Kristian Høgsberg <k...@bitplanet.net> wrote: > From: Kristian Høgsberg Kristensen <k...@bitplanet.net> > > We have to break open a new vec4 for gl_DrawIDARB. We've used up all > space in the vec4 we use for SGVS and gl_DrawIDARB has to come from its > own separate vertex buffer anyway. This is because we point the vb for > base vertex and base instance into the draw parameter BO for indirect > draw calls, but the draw id is generated by mesa in a different buffer. > --- > src/mesa/drivers/dri/i965/brw_compiler.h | 1 + > src/mesa/drivers/dri/i965/brw_context.h | 9 +++++ > src/mesa/drivers/dri/i965/brw_draw.c | 12 ++++++ > src/mesa/drivers/dri/i965/brw_draw_upload.c | 45 > ++++++++++++++++++++++- > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 + > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 10 ++++- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++ > src/mesa/drivers/dri/i965/brw_vec4.cpp | 13 ++++++- > src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 10 ++++- > src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 5 +++ > src/mesa/drivers/dri/i965/gen8_draw_upload.c | 34 ++++++++++++++++- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 12 files changed, 146 insertions(+), 6 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h > b/src/mesa/drivers/dri/i965/brw_compiler.h > index c678f32..21b2ce5 100644 > --- a/src/mesa/drivers/dri/i965/brw_compiler.h > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h > @@ -565,6 +565,7 @@ struct brw_vs_prog_data { > bool uses_instanceid; > bool uses_basevertex; > bool uses_baseinstance; > + bool uses_drawid; > }; > > struct brw_tcs_prog_data > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 1d7575c..6f5cefb 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -923,6 +923,15 @@ struct brw_context > */ > drm_intel_bo *draw_params_bo; > uint32_t draw_params_offset; > + > + /** > + * The value of gl_DrawID for the current _mesa_prim. This always comes > + * in from it's own vertex buffer since it's not part of the indirect > + * draw parameters. > + */ > + int gl_drawid; > + drm_intel_bo *draw_id_bo; > + uint32_t draw_id_offset; > } draw; > > struct { > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index e0665d3..b0a162a 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -511,6 +511,18 @@ brw_try_draw_prims(struct gl_context *ctx, > brw->draw.draw_params_offset = 0; > } > > + /* gl_DrawID always needs its own vertex buffer since it's not part of > + * the indirect parameter buffer. If the program uses gl_DrawID we need > + * to flag BRW_NEW_VERTICES. For the first iteration, we don't have > + * valid brw->vs.prog_data, but we always flag BRW_NEW_VERTICES before > + * the loop. > + */ > + brw->draw.gl_drawid = prims[i].draw_id; > + drm_intel_bo_unreference(brw->draw.draw_id_bo); > + brw->draw.draw_id_bo = NULL; > + if (i > 0 && brw->vs.prog_data->uses_drawid) > + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; > + > if (brw->gen < 6) > brw_set_prim(brw, &prims[i]); > else > diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c > b/src/mesa/drivers/dri/i965/brw_draw_upload.c > index ccf963c..f781d8b 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c > @@ -599,6 +599,12 @@ brw_prepare_shader_draw_parameters(struct brw_context > *brw) > &brw->draw.draw_params_bo, > &brw->draw.draw_params_offset); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + intel_upload_data(brw, &brw->draw.gl_drawid, > sizeof(brw->draw.gl_drawid), 4, > + &brw->draw.draw_id_bo, > + &brw->draw.draw_id_offset); > + } > } > > /** > @@ -663,6 +669,8 @@ brw_emit_vertices(struct brw_context *brw) > if (brw->vs.prog_data->uses_vertexid || > brw->vs.prog_data->uses_instanceid || > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance) > ++nr_elements; > + if (brw->vs.prog_data->uses_drawid) > + nr_elements++; > > /* If the VS doesn't read any inputs (calculating vertex position from > * a state variable for some reason, for example), emit a single pad > @@ -699,7 +707,8 @@ brw_emit_vertices(struct brw_context *brw) > const bool uses_draw_params = > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params; > + const unsigned nr_buffers = brw->vb.nr_buffers + > + uses_draw_params + brw->vs.prog_data->uses_drawid; > > if (nr_buffers) { > if (brw->gen >= 6) { > @@ -726,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw) > 0, /* stride */ > 0); /* step rate */ > } > + > + if (brw->vs.prog_data->uses_drawid) { > + EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, > + brw->draw.draw_id_bo, > + brw->draw.draw_id_bo->size - 1, > + brw->draw.draw_id_offset, > + 0, /* stride */ > + 0); /* step rate */ > + } > + > ADVANCE_BATCH(); > } > > @@ -839,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw) > OUT_BATCH(dw1); > } > > + if (brw->vs.prog_data->uses_drawid) { > + uint32_t dw0 = 0, dw1 = 0; > + > + dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); > + > + if (brw->gen >= 6) { > + dw0 |= GEN6_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); > + } else { > + dw0 |= BRW_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT); > + > + dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; > + } > + > + OUT_BATCH(dw0); > + OUT_BATCH(dw1); > + } > + > if (brw->gen >= 6 && gen6_edgeflag_input) { > uint32_t format = > brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index b0d7b8c..5671a23 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -1674,6 +1674,8 @@ fs_visitor::assign_vs_urb_setup() > if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) > count++; > + if (vs_prog_data->uses_drawid) > + count++; > > /* Each attribute is 4 regs. */ > this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index a8445a7..a8f950f 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -228,6 +228,13 @@ emit_system_values_block(nir_block *block, void > *void_visitor) > *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE); > break; > > + case nir_intrinsic_load_draw_id: > + assert(v->stage == MESA_SHADER_VERTEX); > + reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID]; > + if (reg->file == BAD_FILE) > + *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID); > + break; > + > case nir_intrinsic_load_invocation_id: > assert(v->stage == MESA_SHADER_GEOMETRY); > reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; > @@ -1739,7 +1746,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, > case nir_intrinsic_load_vertex_id_zero_base: > case nir_intrinsic_load_base_vertex: > case nir_intrinsic_load_instance_id: > - case nir_intrinsic_load_base_instance: { > + case nir_intrinsic_load_base_instance: > + case nir_intrinsic_load_draw_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); > fs_reg val = nir_system_values[sv]; > assert(val.file != BAD_FILE); > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index e73955a..e1c0823 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -59,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location) > reg->reg_offset = 3; > vs_prog_data->uses_instanceid = true; > break; > + case SYSTEM_VALUE_DRAW_ID: > + if (nir->info.system_values_read & > + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) | > + BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | > + BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | > + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) > + reg->nr += 4; > + reg->reg_offset = 0; > + vs_prog_data->uses_drawid = true; > + break; > default: > unreachable("not reached"); > } > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 46f955c..34f37a3 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -1555,7 +1555,7 @@ int > vec4_vs_visitor::setup_attributes(int payload_reg) > { > int nr_attributes; > - int attribute_map[VERT_ATTRIB_MAX + 1]; > + int attribute_map[VERT_ATTRIB_MAX + 2]; > memset(attribute_map, 0, sizeof(attribute_map)); > > nr_attributes = 0; > @@ -1566,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) > } > } > > + if (vs_prog_data->uses_drawid) { > + attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; > + nr_attributes++; > + } > + > /* VertexID is stored by the VF as the last vertex element, but we > * don't represent it with a flag in inputs_read, so we call it > * VERT_ATTRIB_MAX. > @@ -1573,6 +1578,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg) > if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid || > vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) { > attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; > + nr_attributes++; > } > > lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); > @@ -1979,6 +1985,11 @@ brw_compile_vs(const struct brw_compiler *compiler, > void *log_data, > nr_attributes++; > } > > + /* gl_DrawID has its very own vec4 */ > + if (shader->info.system_values_read & > BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) { > + nr_attributes++; > + } > + > /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry > * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in > * vec4 mode, the hardware appears to wedge unless we read something. > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > index c6f07d5..e3901b8 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp > @@ -85,6 +85,13 @@ > vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) > glsl_type::int_type); > break; > > + case nir_intrinsic_load_draw_id: > + reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID]; > + if (reg->file == BAD_FILE) > + *reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID, > + glsl_type::int_type); > + break; > + > default: > break; > } > @@ -658,7 +665,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr > *instr) > case nir_intrinsic_load_vertex_id_zero_base: > case nir_intrinsic_load_base_vertex: > case nir_intrinsic_load_instance_id: > - case nir_intrinsic_load_base_instance: { > + case nir_intrinsic_load_base_instance: > + case nir_intrinsic_load_draw_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); > src_reg val = src_reg(nir_system_values[sv]); > assert(val.file != BAD_FILE); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > index bd6a9a4..1d69149 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp > @@ -170,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location, > reg->writemask = WRITEMASK_W; > vs_prog_data->uses_instanceid = true; > break; > + case SYSTEM_VALUE_DRAW_ID: > + reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1); > + reg->writemask = WRITEMASK_X; > + vs_prog_data->uses_drawid = true; > + break; > default: > unreachable("not reached"); > } > diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > index 451cf0b..ff89e5f 100644 > --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c > +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c > @@ -118,7 +118,8 @@ gen8_emit_vertices(struct brw_context *brw) > const bool uses_draw_params = > brw->vs.prog_data->uses_basevertex || > brw->vs.prog_data->uses_baseinstance; > - const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params; > + const unsigned nr_buffers = brw->vb.nr_buffers + > + uses_draw_params + brw->vs.prog_data->uses_drawid; > > if (nr_buffers) { > assert(nr_buffers <= 33); > @@ -147,6 +148,15 @@ gen8_emit_vertices(struct brw_context *brw) > brw->draw.draw_params_offset); > OUT_BATCH(brw->draw.draw_params_bo->size); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT | > + GEN7_VB0_ADDRESS_MODIFYENABLE | > + mocs_wb << 16); > + OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0, > + brw->draw.draw_id_offset); > + OUT_BATCH(brw->draw.draw_id_bo->size); > + } > ADVANCE_BATCH(); > } > > @@ -163,7 +173,8 @@ gen8_emit_vertices(struct brw_context *brw) > ((brw->vs.prog_data->uses_instanceid || > brw->vs.prog_data->uses_vertexid) && > uses_edge_flag)); > - const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element; > + const unsigned nr_elements = > + brw->vb.nr_enabled + needs_sgvs_element + > brw->vs.prog_data->uses_drawid; > > /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, > * presumably for VertexID/InstanceID. > @@ -236,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw) > } > } > > + if (brw->vs.prog_data->uses_drawid) { > + OUT_BATCH(GEN6_VE0_VALID | > + ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | > + (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); > + OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | > + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); > + } > + > if (gen6_edgeflag_input) { > uint32_t format = > brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); > @@ -273,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw) > OUT_BATCH(buffer->step_rate); > ADVANCE_BATCH(); > } > + > + if (brw->vs.prog_data->uses_drawid) { > + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; > + BEGIN_BATCH(3); > + OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); > + OUT_BATCH(element); > + OUT_BATCH(0); > + ADVANCE_BATCH(); > + } > } > > const struct brw_tracked_state gen8_vertices = { > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index 24761a7..c95bfbd 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx) > ctx->Extensions.ARB_point_sprite = true; > ctx->Extensions.ARB_seamless_cube_map = true; > ctx->Extensions.ARB_shader_bit_encoding = true; > + ctx->Extensions.ARB_shader_draw_parameters = true; > ctx->Extensions.ARB_shader_texture_lod = true; > ctx->Extensions.ARB_shadow = true; > ctx->Extensions.ARB_sync = true; > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Reviewed-by: Anuj Phogat <anuj.pho...@gmail.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev