On 05/20/2015 10:21 AM, Pohjolainen, Topi wrote: > On Wed, May 20, 2015 at 10:11:36AM +0300, Abdiel Janulgue wrote: >> >> >> On 05/20/2015 09:54 AM, Pohjolainen, Topi wrote: >>> On Wed, May 20, 2015 at 09:32:08AM +0300, Abdiel Janulgue wrote: >>>> This patch implements the binding table enable command which is also >>>> used to allocate a binding table pool where where hardware-generated >>>> binding table entries are flushed into. Each binding table offset in >>>> the binding table pool is unique per each shader stage that are >>>> enabled within a batch. >>>> >>>> Also insert the required brw_tracked_state objects to enable >>>> hw-generated binding tables in normal render path. >>>> >>>> Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> >>>> --- >>>> src/mesa/drivers/dri/i965/brw_binding_tables.c | 97 >>>> ++++++++++++++++++++++++++ >>>> src/mesa/drivers/dri/i965/brw_context.c | 4 ++ >>>> src/mesa/drivers/dri/i965/brw_context.h | 6 ++ >>>> src/mesa/drivers/dri/i965/brw_state.h | 7 ++ >>>> src/mesa/drivers/dri/i965/brw_state_upload.c | 2 + >>>> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++ >>>> 6 files changed, 120 insertions(+) >>>> >>>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c >>>> b/src/mesa/drivers/dri/i965/brw_binding_tables.c >>>> index 98ff0dd..d8cb96d 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c >>>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c >>>> @@ -45,6 +45,23 @@ >>>> #include "intel_batchbuffer.h" >>>> >>>> /** >>>> + * We are required to start at this offset for binding table pointer >>>> state when >>>> + * HW-generated binding table is enabled otherwise the GPU will hung. >>>> Note that >>>> + * the binding table offsets are now relative to the binding tabe pool >>>> base >>>> + * address instead of from the state batch. >>>> + * >>>> + * From the Bspec 3DSTATE_BINDING_TABLE_POINTERS_{PS/VS/GS/DS/HS} > >>>> Pointer to >>>> + * PS Binding Table section lists the format as: >>>> + * >>>> + * "SurfaceStateOffset[16:6]BINDING_TABLE_STATE*256 When >>>> + * HW-generated binding table is enabled" >>>> + * >>>> + * When HW-generated binding tables are enabled, Surface State Offsets are >>>> + * 16-bit entries. >>>> + */ >>>> +#define HW_BT_START_OFFSET 2 * 256; > > There is also extra ; in the end. > >>> >>> Just checking that I'm reading this right, the multiplier two here is based >>> on your experiments and it is not found in the spec? >> >> It's in the spec. 2 is "BINDING_TABLE_STATE" which is 16-bits. > > Ah, okay, now I get the last two lines of the documentation. I would have > probably written it other way around then, 256 * 2 (num_elems * elem_size), > even as 256 * sizeof(uint16_t).
256 * sizeof(uint16_t) looks good! I'll do that in the next version. Thanks, > >> >>> >>>> + >>>> +/** >>>> * Upload a shader stage's binding table as indirect state. >>>> * >>>> * This copies brw_stage_state::surf_offset[] into the indirect state >>>> section >>>> @@ -170,6 +187,86 @@ const struct brw_tracked_state brw_gs_binding_table = >>>> { >>>> .emit = brw_gs_upload_binding_table, >>>> }; >>>> >>>> +/** >>>> + * Hardware-generated binding tables for the resource streamer >>>> + */ >>>> +void >>>> +gen7_disable_hw_binding_tables(struct brw_context *brw) >>>> +{ >>>> + BEGIN_BATCH(3); >>>> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); >>>> + OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, >>>> BRW_HW_BINDING_TABLE_ENABLE) | >>>> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0); >>>> + OUT_BATCH(0); >>>> + ADVANCE_BATCH(); >>>> + >>>> + /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding >>>> + * Tables > Programming note >>>> + >>>> + * "When switching between HW and SW binding table generation, SW must >>>> + * issue a state cache invalidate." >>>> + */ >>>> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); >>>> +} >>>> + >>>> +void >>>> +gen7_enable_hw_binding_tables(struct brw_context *brw) >>>> +{ >>>> + if (!brw->has_resource_streamer) { >>>> + gen7_disable_hw_binding_tables(brw); >>>> + return; >>>> + } >>>> + >>>> + if (!brw->hw_bt_pool.bo) { >>>> + /* We use a single re-usable buffer object for the lifetime of the >>>> + * context and size it to maximum allowed binding tables that can be >>>> + * programmed per batch: >>>> + * >>>> + * BSpec, 3D Pipeline > Resource Streamer > Hardware Binding Tables: >>>> + * "A maximum of 16,383 Binding tables are allowed in any batch >>>> buffer" >>>> + */ >>>> + static const int max_size = 16383 * 4; >>>> + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", >>>> + max_size, 64); >>>> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET; >>>> + } >>>> + >>>> + uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, >>>> + BRW_HW_BINDING_TABLE_ENABLE); >>>> + if (brw->is_haswell) >>>> + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | >>>> + HSW_HW_BINDING_TABLE_RESERVED; >>>> + >>>> + BEGIN_BATCH(3); >>>> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); >>>> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); >>>> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, >>>> + brw->hw_bt_pool.bo->size); >>>> + ADVANCE_BATCH(); >>>> + >>>> + /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding >>>> + * Tables > Programming note >>>> + >>>> + * "When switching between HW and SW binding table generation, SW must >>>> + * issue a state cache invalidate." >>>> + */ >>>> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); >>>> +} >>>> + >>>> +void >>>> +gen7_reset_rs_pool_offsets(struct brw_context *brw) >>>> +{ >>>> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET; >>>> +} >>>> + >>>> +const struct brw_tracked_state gen7_hw_binding_tables = { >>>> + .dirty = { >>>> + .mesa = 0, >>>> + .brw = BRW_NEW_BATCH, >>>> + }, >>>> + .emit = gen7_enable_hw_binding_tables >>>> +}; >>>> + >>>> /** @} */ >>>> >>>> /** >>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c >>>> b/src/mesa/drivers/dri/i965/brw_context.c >>>> index ea56859..4a572d1 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_context.c >>>> +++ b/src/mesa/drivers/dri/i965/brw_context.c >>>> @@ -961,6 +961,10 @@ intelDestroyContext(__DRIcontext * driContextPriv) >>>> if (brw->wm.base.scratch_bo) >>>> drm_intel_bo_unreference(brw->wm.base.scratch_bo); >>>> >>>> + gen7_reset_rs_pool_offsets(brw); >>>> + drm_intel_bo_unreference(brw->hw_bt_pool.bo); >>>> + brw->hw_bt_pool.bo = NULL; >>>> + >>>> drm_intel_gem_context_destroy(brw->hw_ctx); >>>> >>>> if (ctx->swrast_context) { >>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h >>>> b/src/mesa/drivers/dri/i965/brw_context.h >>>> index 3f8e59d..94127b6 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_context.h >>>> +++ b/src/mesa/drivers/dri/i965/brw_context.h >>>> @@ -1404,6 +1404,12 @@ struct brw_context >>>> struct brw_cs_prog_data *prog_data; >>>> } cs; >>>> >>>> + /* RS hardware binding table */ >>>> + struct { >>>> + drm_intel_bo *bo; >>>> + uint32_t next_offset; >>>> + } hw_bt_pool; >>>> + >>>> struct { >>>> uint32_t state_offset; >>>> uint32_t blend_state_offset; >>>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h >>>> b/src/mesa/drivers/dri/i965/brw_state.h >>>> index 987672f..52dea58 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_state.h >>>> +++ b/src/mesa/drivers/dri/i965/brw_state.h >>>> @@ -132,6 +132,7 @@ extern const struct brw_tracked_state gen7_sol_state; >>>> extern const struct brw_tracked_state gen7_urb; >>>> extern const struct brw_tracked_state gen7_vs_state; >>>> extern const struct brw_tracked_state gen7_wm_state; >>>> +extern const struct brw_tracked_state gen7_hw_binding_tables; >>>> extern const struct brw_tracked_state haswell_cut_index; >>>> extern const struct brw_tracked_state gen8_blend_state; >>>> extern const struct brw_tracked_state gen8_disable_stages; >>>> @@ -372,6 +373,12 @@ gen7_upload_constant_state(struct brw_context *brw, >>>> const struct brw_stage_state *stage_state, >>>> bool active, unsigned opcode); >>>> >>>> +/* gen7_misc_state.c */ >>>> +void gen7_rs_control(struct brw_context *brw, int enable); >>>> +void gen7_enable_hw_binding_tables(struct brw_context *brw); >>>> +void gen7_disable_hw_binding_tables(struct brw_context *brw); >>>> +void gen7_reset_rs_pool_offsets(struct brw_context *brw); >>>> + >>>> #ifdef __cplusplus >>>> } >>>> #endif >>>> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c >>>> b/src/mesa/drivers/dri/i965/brw_state_upload.c >>>> index 84b0861..a9d64bd 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c >>>> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c >>>> @@ -191,6 +191,8 @@ static const struct brw_tracked_state >>>> *gen7_render_atoms[] = >>>> &gen6_color_calc_state, /* must do before cc unit */ >>>> &gen6_depth_stencil_state, /* must do before cc unit */ >>>> >>>> + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for >>>> Haswell */ >>>> + >>>> &gen6_vs_push_constants, /* Before vs_state */ >>>> &gen6_gs_push_constants, /* Before gs_state */ >>>> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ >>>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c >>>> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c >>>> index a2a3a95..caeb31b 100644 >>>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c >>>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c >>>> @@ -32,6 +32,7 @@ >>>> #include "intel_buffers.h" >>>> #include "intel_fbo.h" >>>> #include "brw_context.h" >>>> +#include "brw_state.h" >>>> >>>> #include <xf86drm.h> >>>> #include <i915_drm.h> >>>> @@ -379,6 +380,9 @@ _intel_batchbuffer_flush(struct brw_context *brw, >>>> drm_intel_bo_wait_rendering(brw->batch.bo); >>>> } >>>> >>>> + if (brw->gen >= 7) >>>> + gen7_reset_rs_pool_offsets(brw); >>>> + >>>> /* Start a new batch buffer. */ >>>> brw_new_batch(brw); >>>> >>>> -- >>>> 1.9.1 >>>> >>>> _______________________________________________ >>>> mesa-dev mailing list >>>> mesa-dev@lists.freedesktop.org >>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >>> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev