On Fri, May 27, 2016 at 1:01 PM, Jordan Justen <jordan.l.jus...@intel.com> wrote:
> On 2016-05-20 12:41:04, Jason Ekstrand wrote: > > Instead of blasting it out as part of the pipeline, we put it in the > > command buffer and only blast it out when it's really needed. Since the > > PUSH_CONSTANT_ALLOC commands aren't pipelined, they immediately cause a > > stall which we would like to avoid. > > --- > > src/intel/vulkan/anv_cmd_buffer.c | 1 + > > src/intel/vulkan/anv_pipeline.c | 22 ---------- > > src/intel/vulkan/anv_private.h | 2 +- > > src/intel/vulkan/genX_cmd_buffer.c | 78 > +++++++++++++++++++++++++++++++---- > > src/intel/vulkan/genX_pipeline_util.h | 12 ------ > > 5 files changed, 71 insertions(+), 44 deletions(-) > > > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > > index bba24e8..db7793e 100644 > > --- a/src/intel/vulkan/anv_cmd_buffer.c > > +++ b/src/intel/vulkan/anv_cmd_buffer.c > > @@ -130,6 +130,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer > *cmd_buffer) > > state->descriptors_dirty = 0; > > state->push_constants_dirty = 0; > > state->pipeline = NULL; > > + state->push_constant_stages = 0; > > state->restart_index = UINT32_MAX; > > state->dynamic = default_dynamic_state; > > state->need_query_wa = true; > > diff --git a/src/intel/vulkan/anv_pipeline.c > b/src/intel/vulkan/anv_pipeline.c > > index faa0adb..c010f0f 100644 > > --- a/src/intel/vulkan/anv_pipeline.c > > +++ b/src/intel/vulkan/anv_pipeline.c > > @@ -939,28 +939,6 @@ anv_compute_urb_partition(struct anv_pipeline > *pipeline) > > pipeline->urb.start[MESA_SHADER_TESS_EVAL] = push_constant_chunks; > > pipeline->urb.size[MESA_SHADER_TESS_EVAL] = 1; > > pipeline->urb.entries[MESA_SHADER_TESS_EVAL] = 0; > > - > > - const unsigned stages = > > - _mesa_bitcount(pipeline->active_stages & > VK_SHADER_STAGE_ALL_GRAPHICS); > > - unsigned size_per_stage = stages ? (push_constant_kb / stages) : 0; > > - unsigned used_kb = 0; > > - > > - /* Broadwell+ and Haswell gt3 require that the push constant sizes > be in > > - * units of 2KB. Incidentally, these are the same platforms that > have > > - * 32KB worth of push constant space. > > - */ > > - if (push_constant_kb == 32) > > - size_per_stage &= ~1u; > > - > > - for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { > > - pipeline->urb.push_size[i] = > > - (pipeline->active_stages & (1 << i)) ? size_per_stage : 0; > > - used_kb += pipeline->urb.push_size[i]; > > - assert(used_kb <= push_constant_kb); > > - } > > - > > - pipeline->urb.push_size[MESA_SHADER_FRAGMENT] = > > - push_constant_kb - used_kb; > > } > > > > static void > > diff --git a/src/intel/vulkan/anv_private.h > b/src/intel/vulkan/anv_private.h > > index 33cbff9..80e768d 100644 > > --- a/src/intel/vulkan/anv_private.h > > +++ b/src/intel/vulkan/anv_private.h > > @@ -1177,6 +1177,7 @@ struct anv_cmd_state { > > uint32_t restart_index; > > struct anv_vertex_binding > vertex_bindings[MAX_VBS]; > > struct anv_descriptor_set * descriptors[MAX_SETS]; > > + VkShaderStageFlags push_constant_stages; > > struct anv_push_constants * > push_constants[MESA_SHADER_STAGES]; > > struct anv_state > binding_tables[MESA_SHADER_STAGES]; > > struct anv_state > samplers[MESA_SHADER_STAGES]; > > @@ -1411,7 +1412,6 @@ struct anv_pipeline { > > uint32_t > scratch_start[MESA_SHADER_STAGES]; > > uint32_t total_scratch; > > struct { > > - uint8_t > push_size[MESA_SHADER_FRAGMENT + 1]; > > uint32_t > start[MESA_SHADER_GEOMETRY + 1]; > > uint32_t > size[MESA_SHADER_GEOMETRY + 1]; > > uint32_t > entries[MESA_SHADER_GEOMETRY + 1]; > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > > index ee47c29..f5e3530 100644 > > --- a/src/intel/vulkan/genX_cmd_buffer.c > > +++ b/src/intel/vulkan/genX_cmd_buffer.c > > @@ -274,6 +274,72 @@ void genX(CmdPipelineBarrier)( > > } > > } > > > > +static void > > +cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) > > +{ > > + VkShaderStageFlags stages = > cmd_buffer->state.pipeline->active_stages; > > + > > + /* In order to avoid thrash, we assume that vertex and fragment > stages > > + * always exist. In the rare case where one is missing *and* the > other > > + * uses push concstants, this may be suboptimal. However, avoiding > stalls > > + * seems more important. > > + */ > > + stages |= VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT; > > + > > + if (stages == cmd_buffer->state.push_constant_stages) > > + return; > > + > > +#if GEN_GEN >= 8 > > + const unsigned push_constant_kb = 32; > > +#elif GEN_IS_HASWELL > > + const unsigned push_constant_kb = cmd_buffer->device->info.gt == 3 > ? 32 : 16; > > +#else > > + const unsigned push_constant_kb = 16; > > +#endif > > I noted that this changed from the old code that just used 32k. I see > that this matches src/mesa/drivers/dri/i965/gen7_urb.c. Maybe worth > noting in the commit message? > I think we just use 32KB at one point, but I fixed that a while ago. > Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > Thanks > > > + > > + const unsigned num_stages = > > + _mesa_bitcount(stages & VK_SHADER_STAGE_ALL_GRAPHICS); > > + unsigned size_per_stage = push_constant_kb / num_stages; > > + > > + /* Broadwell+ and Haswell gt3 require that the push constant sizes > be in > > + * units of 2KB. Incidentally, these are the same platforms that > have > > + * 32KB worth of push constant space. > > + */ > > + if (push_constant_kb == 32) > > + size_per_stage &= ~1u; > > + > > + uint32_t kb_used = 0; > > + for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) { > > + unsigned push_size = (stages & (1 << i)) ? size_per_stage : 0; > > + anv_batch_emit(&cmd_buffer->batch, > > + GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) { > > + alloc._3DCommandSubOpcode = 18 + i; > > + alloc.ConstantBufferOffset = (push_size > 0) ? kb_used : 0; > > + alloc.ConstantBufferSize = push_size; > > + } > > + kb_used += push_size; > > + } > > + > > + anv_batch_emit(&cmd_buffer->batch, > > + GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), alloc) { > > + alloc.ConstantBufferOffset = kb_used; > > + alloc.ConstantBufferSize = push_constant_kb - kb_used; > > + } > > + > > + cmd_buffer->state.push_constant_stages = stages; > > + > > + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: > > + * > > + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to > > + * the next 3DPRIMITIVE command after programming the > > + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" > > + * > > + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of > > + * pipeline setup, we need to dirty push constants. > > + */ > > + cmd_buffer->state.push_constants_dirty |= > VK_SHADER_STAGE_ALL_GRAPHICS; > > +} > > + > > static uint32_t > > cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) > > { > > @@ -384,16 +450,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer > *cmd_buffer) > > > > anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); > > > > - /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: > > - * > > - * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to > > - * the next 3DPRIMITIVE command after programming the > > - * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" > > - * > > - * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of > > - * pipeline setup, we need to dirty push constants. > > + /* If the pipeline changed, we may need to re-allocate push > constant > > + * space in the URB. > > */ > > - cmd_buffer->state.push_constants_dirty |= > VK_SHADER_STAGE_ALL_GRAPHICS; > > + cmd_buffer_alloc_push_constants(cmd_buffer); > > } > > > > #if GEN_GEN <= 7 > > diff --git a/src/intel/vulkan/genX_pipeline_util.h > b/src/intel/vulkan/genX_pipeline_util.h > > index ecbe436..2ed55e0 100644 > > --- a/src/intel/vulkan/genX_pipeline_util.h > > +++ b/src/intel/vulkan/genX_pipeline_util.h > > @@ -205,18 +205,6 @@ emit_urb_setup(struct anv_pipeline *pipeline) > > } > > #endif > > > > - unsigned push_start = 0; > > - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { > > - unsigned push_size = pipeline->urb.push_size[i]; > > - anv_batch_emit(&pipeline->batch, > > - GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) { > > - alloc._3DCommandSubOpcode = 18 + i; > > - alloc.ConstantBufferOffset = (push_size > 0) ? push_start : 0; > > - alloc.ConstantBufferSize = push_size; > > - } > > - push_start += pipeline->urb.push_size[i]; > > - } > > - > > for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { > > anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), urb) { > > urb._3DCommandSubOpcode = 48 + i; > > -- > > 2.5.0.400.gff86faf > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev