brw_draw_prims needs to be refactored prior to ARB_indirect_parameters implementation.
Signed-off-by: Plamena Manolova <plamena.manol...@intel.com> --- src/mesa/drivers/dri/i965/brw_draw.c | 343 +++++++++++++++++++---------------- 1 file changed, 189 insertions(+), 154 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a8ad2ac..7597bae 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -531,7 +531,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) if (!irb) continue; - + brw_render_cache_set_add_bo(brw, irb->mt->bo); intel_miptree_finish_render(brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count, @@ -594,21 +594,163 @@ brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw) * fallback conditions. */ static void -brw_try_draw_prims(struct gl_context *ctx, - const struct gl_vertex_array *arrays[], - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - bool index_bounds_valid, - GLuint min_index, - GLuint max_index, - struct brw_transform_feedback_object *xfb_obj, - unsigned stream, - struct gl_buffer_object *indirect) +brw_try_draw_prim(struct gl_context *ctx, + const struct gl_vertex_array *arrays[], + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + bool index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream, + struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); - GLuint i; bool fail_next = false; + int estimated_max_prim_size; + const int sampler_state_size = 16; + + estimated_max_prim_size = 512; /* batchbuffer commands */ + estimated_max_prim_size += BRW_MAX_TEX_UNIT * + (sampler_state_size + sizeof(struct gen5_sampler_default_color)); + estimated_max_prim_size += 1024; /* gen6 VS push constants */ + estimated_max_prim_size += 1024; /* gen6 WM push constants */ + estimated_max_prim_size += 512; /* misc. pad */ + + /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have + * atoms that happen on every draw call. + */ + brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; + + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. + */ + intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); + intel_batchbuffer_save_state(brw); + + if (brw->num_instances != prim->num_instances || + brw->basevertex != prim->basevertex || + brw->baseinstance != prim->base_instance) { + brw->num_instances = prim->num_instances; + brw->basevertex = prim->basevertex; + brw->baseinstance = prim->base_instance; + if (prim->draw_id > 0) { /* For draw_id == 0 we just did this before the loop */ + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; + brw_merge_inputs(brw, arrays); + } + } + + /* Determine if we need to flag BRW_NEW_VERTICES for updating the + * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we + * always flag if the shader uses one of the values. For direct draws, + * we only flag if the values change. + */ + const int new_basevertex = + prim->indexed ? prim->basevertex : prim->start; + const int new_baseinstance = prim->base_instance; + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + if (prim->draw_id > 0) { + const bool uses_draw_parameters = + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; + + if ((uses_draw_parameters && prim->is_indirect) || + (vs_prog_data->uses_basevertex && + brw->draw.params.gl_basevertex != new_basevertex) || + (vs_prog_data->uses_baseinstance && + brw->draw.params.gl_baseinstance != new_baseinstance)) + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; + } + + brw->draw.params.gl_basevertex = new_basevertex; + brw->draw.params.gl_baseinstance = new_baseinstance; + brw_bo_unreference(brw->draw.draw_params_bo); + + if (prim->is_indirect) { + /* Point draw_params_bo at the indirect buffer. */ + brw->draw.draw_params_bo = + intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; + brw_bo_reference(brw->draw.draw_params_bo); + brw->draw.draw_params_offset = + prim->indirect_offset + (prim->indexed ? 12 : 8); + } else { + /* Set draw_params_bo to NULL so brw_prepare_vertices knows it + * has to upload gl_BaseVertex and such if they're needed. + */ + brw->draw.draw_params_bo = NULL; + brw->draw.draw_params_offset = 0; + } + + /* gl_DrawID always needs its own vertex buffer since it's not part of + * the indirect parameter buffer. If the program uses gl_DrawID we need + * to flag BRW_NEW_VERTICES. For the first iteration, we don't have + * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before + * the loop. + */ + brw->draw.gl_drawid = prim->draw_id; + brw_bo_unreference(brw->draw.draw_id_bo); + brw->draw.draw_id_bo = NULL; + if (prim->draw_id > 0 && vs_prog_data->uses_drawid) + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; + + if (brw->gen < 6) + brw_set_prim(brw, prim); + else + gen6_set_prim(brw, prim); + +retry: + + /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and + * that the state updated in the loop outside of this block is that in + * *_set_prim or intel_batchbuffer_flush(), which only impacts + * brw->ctx.NewDriverState. + */ + if (brw->ctx.NewDriverState) { + brw->no_batch_wrap = true; + brw_upload_render_state(brw); + } + + brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream); + + brw->no_batch_wrap = false; + + if (!brw_batch_has_aperture_space(brw, 0)) { + if (!fail_next) { + intel_batchbuffer_reset_to_saved(brw); + intel_batchbuffer_flush(brw); + fail_next = true; + goto retry; + } else { + int ret = intel_batchbuffer_flush(brw); + WARN_ONCE(ret == -ENOSPC, + "i965: Single primitive emit exceeded " + "available aperture space\n"); + } + } + + /* Now that we know we haven't run out of aperture space, we can safely + * reset the dirty bits. + */ + if (brw->ctx.NewDriverState) + brw_render_state_finished(brw); +} + +static void +brw_prepare_draw_prims(struct gl_context *ctx, + const struct gl_vertex_array *arrays[], + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + bool index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream, + struct gl_buffer_object *indirect) +{ + struct brw_context *brw = brw_context(ctx); if (ctx->NewState) _mesa_update_state(ctx); @@ -655,145 +797,22 @@ brw_try_draw_prims(struct gl_context *ctx, /* Bind all inputs, derive varying and size information: */ brw_merge_inputs(brw, arrays); +} - brw->ib.ib = ib; - brw->ctx.NewDriverState |= BRW_NEW_INDICES; - - brw->vb.index_bounds_valid = index_bounds_valid; - brw->vb.min_index = min_index; - brw->vb.max_index = max_index; - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - - for (i = 0; i < nr_prims; i++) { - int estimated_max_prim_size; - const int sampler_state_size = 16; - - estimated_max_prim_size = 512; /* batchbuffer commands */ - estimated_max_prim_size += BRW_MAX_TEX_UNIT * - (sampler_state_size + sizeof(struct gen5_sampler_default_color)); - estimated_max_prim_size += 1024; /* gen6 VS push constants */ - estimated_max_prim_size += 1024; /* gen6 WM push constants */ - estimated_max_prim_size += 512; /* misc. pad */ - - /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have - * atoms that happen on every draw call. - */ - brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; - - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. - */ - intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); - intel_batchbuffer_save_state(brw); - - if (brw->num_instances != prims[i].num_instances || - brw->basevertex != prims[i].basevertex || - brw->baseinstance != prims[i].base_instance) { - brw->num_instances = prims[i].num_instances; - brw->basevertex = prims[i].basevertex; - brw->baseinstance = prims[i].base_instance; - if (i > 0) { /* For i == 0 we just did this before the loop */ - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - brw_merge_inputs(brw, arrays); - } - } - - /* Determine if we need to flag BRW_NEW_VERTICES for updating the - * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we - * always flag if the shader uses one of the values. For direct draws, - * we only flag if the values change. - */ - const int new_basevertex = - prims[i].indexed ? prims[i].basevertex : prims[i].start; - const int new_baseinstance = prims[i].base_instance; - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - if (i > 0) { - const bool uses_draw_parameters = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - - if ((uses_draw_parameters && prims[i].is_indirect) || - (vs_prog_data->uses_basevertex && - brw->draw.params.gl_basevertex != new_basevertex) || - (vs_prog_data->uses_baseinstance && - brw->draw.params.gl_baseinstance != new_baseinstance)) - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - } - - brw->draw.params.gl_basevertex = new_basevertex; - brw->draw.params.gl_baseinstance = new_baseinstance; - brw_bo_unreference(brw->draw.draw_params_bo); - - if (prims[i].is_indirect) { - /* Point draw_params_bo at the indirect buffer. */ - brw->draw.draw_params_bo = - intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; - brw_bo_reference(brw->draw.draw_params_bo); - brw->draw.draw_params_offset = - prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); - } else { - /* Set draw_params_bo to NULL so brw_prepare_vertices knows it - * has to upload gl_BaseVertex and such if they're needed. - */ - brw->draw.draw_params_bo = NULL; - brw->draw.draw_params_offset = 0; - } - - /* gl_DrawID always needs its own vertex buffer since it's not part of - * the indirect parameter buffer. If the program uses gl_DrawID we need - * to flag BRW_NEW_VERTICES. For the first iteration, we don't have - * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before - * the loop. - */ - brw->draw.gl_drawid = prims[i].draw_id; - brw_bo_unreference(brw->draw.draw_id_bo); - brw->draw.draw_id_bo = NULL; - if (i > 0 && vs_prog_data->uses_drawid) - brw->ctx.NewDriverState |= BRW_NEW_VERTICES; - - if (brw->gen < 6) - brw_set_prim(brw, &prims[i]); - else - gen6_set_prim(brw, &prims[i]); - -retry: - - /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and - * that the state updated in the loop outside of this block is that in - * *_set_prim or intel_batchbuffer_flush(), which only impacts - * brw->ctx.NewDriverState. - */ - if (brw->ctx.NewDriverState) { - brw->no_batch_wrap = true; - brw_upload_render_state(brw); - } - - brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream); - - brw->no_batch_wrap = false; - - if (!brw_batch_has_aperture_space(brw, 0)) { - if (!fail_next) { - intel_batchbuffer_reset_to_saved(brw); - intel_batchbuffer_flush(brw); - fail_next = true; - goto retry; - } else { - int ret = intel_batchbuffer_flush(brw); - WARN_ONCE(ret == -ENOSPC, - "i965: Single primitive emit exceeded " - "available aperture space\n"); - } - } - - /* Now that we know we haven't run out of aperture space, we can safely - * reset the dirty bits. - */ - if (brw->ctx.NewDriverState) - brw_render_state_finished(brw); - } +static void +brw_end_draw_prims(struct gl_context *ctx, + const struct gl_vertex_array *arrays[], + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + bool index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream, + struct gl_buffer_object *indirect) +{ + struct brw_context *brw = brw_context(ctx); if (brw->always_flush_batch) intel_batchbuffer_flush(brw); @@ -801,8 +820,6 @@ retry: brw_program_cache_check_size(brw); brw_postdraw_reconcile_align_wa_slices(brw); brw_postdraw_set_buffers_need_resolve(brw); - - return; } void @@ -819,6 +836,7 @@ brw_draw_prims(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); const struct gl_vertex_array **arrays = ctx->Array._DrawArrays; + int i; struct brw_transform_feedback_object *xfb_obj = (struct brw_transform_feedback_object *) gl_xfb_obj; @@ -855,11 +873,28 @@ brw_draw_prims(struct gl_context *ctx, index_bounds_valid = true; } + if (prims[0].draw_id == 0) + brw_prepare_draw_prims(ctx, arrays, prims, nr_prims, ib, + index_bounds_valid, min_index, max_index, xfb_obj, + stream, indirect); + + brw->ib.ib = ib; + brw->ctx.NewDriverState |= BRW_NEW_INDICES; + + brw->vb.index_bounds_valid = index_bounds_valid; + brw->vb.min_index = min_index; + brw->vb.max_index = max_index; + brw->ctx.NewDriverState |= BRW_NEW_VERTICES; /* Try drawing with the hardware, but don't do anything else if we can't * manage it. swrast doesn't support our featureset, so we can't fall back * to it. */ - brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, + + for (i = 0; i < nr_prims; i++) { + brw_try_draw_prim(ctx, arrays, &prims[i], ib, index_bounds_valid, + min_index, max_index, xfb_obj, stream, indirect); + } + brw_end_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, xfb_obj, stream, indirect); } -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev