From: Dave Airlie <airl...@redhat.com> This shifts a bunch of the pipeline specific calcs into pipeline creation.
This should allow better optimising of the multi vgt calcs --- src/amd/vulkan/radv_pipeline.c | 62 ++++++++++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 6 ++++ src/amd/vulkan/si_cmd_buffer.c | 60 ++++------------------------------------ 3 files changed, 73 insertions(+), 55 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e77f959..ccbe20d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1960,6 +1960,8 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline) pipeline->graphics.ps_input_cntl_num = ps_offset; } +#define SI_GS_PER_ES 128 + VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -2170,8 +2172,68 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pipeline->graphics.prim_vertex_count.incr = 1; } calculate_tess_state(pipeline, pCreateInfo); + + } + + pipeline->graphics.primgroup_size = 128; + if (radv_pipeline_has_tess(pipeline)) + pipeline->graphics.primgroup_size = pipeline->graphics.tess.num_patches; + else if (radv_pipeline_has_gs(pipeline)) + pipeline->graphics.primgroup_size = 64; + + /* WD_SWITCH_ON_EOP has no effect on GPUs with less than + * 4 shader engines. Set 1 to pass the assertion below. + * The other cases are hardware requirements. */ + if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + if (pipeline->device->physical_device->rad_info.max_se < 4 || + pipeline->graphics.prim == V_008958_DI_PT_POLYGON || + pipeline->graphics.prim == V_008958_DI_PT_LINELOOP || + pipeline->graphics.prim == V_008958_DI_PT_TRIFAN || + pipeline->graphics.prim == V_008958_DI_PT_TRISTRIP_ADJ || + (pipeline->graphics.prim_restart_enable && + (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 || + (pipeline->graphics.prim != V_008958_DI_PT_POINTLIST && + pipeline->graphics.prim != V_008958_DI_PT_LINESTRIP && + pipeline->graphics.prim != V_008958_DI_PT_TRISTRIP)))) + pipeline->graphics.cik_wd_switch_on_eop = true; + } + + if (radv_pipeline_has_tess(pipeline)) { + /* SWITCH_ON_EOI must be set if PrimID is used. */ + if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id || + pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id) + pipeline->graphics.tess_ia_switch_on_eoi = true; + + /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ + if ((pipeline->device->physical_device->rad_info.family == CHIP_TAHITI || + pipeline->device->physical_device->rad_info.family == CHIP_PITCAIRN || + pipeline->device->physical_device->rad_info.family == CHIP_BONAIRE) && + radv_pipeline_has_gs(pipeline)) + pipeline->graphics.tess_partial_vs_wave = true; + + /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ + if (pipeline->device->has_distributed_tess) { + if (radv_pipeline_has_gs(pipeline)) { + if (pipeline->device->physical_device->rad_info.chip_class <= VI) + pipeline->graphics.partial_es_wave = true; + + if (pipeline->device->physical_device->rad_info.family == CHIP_TONGA || + pipeline->device->physical_device->rad_info.family == CHIP_FIJI || + pipeline->device->physical_device->rad_info.family == CHIP_POLARIS10 || + pipeline->device->physical_device->rad_info.family == CHIP_POLARIS11 || + pipeline->device->physical_device->rad_info.family == CHIP_POLARIS12) + pipeline->graphics.tess_partial_vs_wave = true; + } else { + pipeline->graphics.tess_partial_vs_wave = true; + } + } } + if (radv_pipeline_has_gs(pipeline)) + if (SI_GS_PER_ES / pipeline->graphics.primgroup_size >= pipeline->device->gs_table_depth - 3) + pipeline->graphics.partial_es_wave = true; + + const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index c21b17e..c88f1ff 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1079,8 +1079,14 @@ struct radv_pipeline { uint32_t vgt_shader_stages_en; uint32_t vtx_base_sgpr; uint8_t vtx_emit_num; + uint8_t primgroup_size; struct radv_prim_vertex_count prim_vertex_count; bool can_use_guardband; + bool gs_partial_es_wave; + bool cik_wd_switch_on_eop; + bool tess_ia_switch_on_eoi; + bool tess_partial_vs_wave; + bool partial_es_wave; } graphics; }; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 47bf553..3464def 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -681,8 +681,6 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class; enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family; struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; - unsigned prim = cmd_buffer->state.pipeline->graphics.prim; - unsigned primgroup_size = 128; /* recommended without a GS */ unsigned max_primgroup_in_wave = 2; /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; @@ -693,10 +691,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, uint32_t num_prims = 0; bool multi_instances_smaller_than_primgroup; bool instance_less_than_primgroup_size = false; - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - primgroup_size = cmd_buffer->state.pipeline->graphics.tess.num_patches; - else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - primgroup_size = 64; /* recommended with a GS */ + uint32_t primgroup_size = cmd_buffer->state.pipeline->graphics.primgroup_size; if (instanced_draw || radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count); @@ -704,54 +699,13 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, } multi_instances_smaller_than_primgroup = indirect_draw || instance_less_than_primgroup_size; - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) { - /* SWITCH_ON_EOI must be set if PrimID is used. */ - if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id || - cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id) - ia_switch_on_eoi = true; - - /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ - if ((family == CHIP_TAHITI || - family == CHIP_PITCAIRN || - family == CHIP_BONAIRE) && - radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - partial_vs_wave = true; - /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ - if (cmd_buffer->device->has_distributed_tess) { - if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { - if (chip_class <= VI) - partial_es_wave = true; - - if (family == CHIP_TONGA || - family == CHIP_FIJI || - family == CHIP_POLARIS10 || - family == CHIP_POLARIS11 || - family == CHIP_POLARIS12) - partial_vs_wave = true; - } else { - partial_vs_wave = true; - } - } - } - /* TODO linestipple */ + ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.tess_ia_switch_on_eoi; + partial_vs_wave = cmd_buffer->state.pipeline->graphics.tess_partial_vs_wave; + partial_es_wave = cmd_buffer->state.pipeline->graphics.partial_es_wave; + wd_switch_on_eop = cmd_buffer->state.pipeline->graphics.cik_wd_switch_on_eop; if (chip_class >= CIK) { - /* WD_SWITCH_ON_EOP has no effect on GPUs with less than - * 4 shader engines. Set 1 to pass the assertion below. - * The other cases are hardware requirements. */ - if (info->max_se < 4 || - prim == V_008958_DI_PT_POLYGON || - prim == V_008958_DI_PT_LINELOOP || - prim == V_008958_DI_PT_TRIFAN || - prim == V_008958_DI_PT_TRISTRIP_ADJ || - (cmd_buffer->state.pipeline->graphics.prim_restart_enable && - (family < CHIP_POLARIS10 || - (prim != V_008958_DI_PT_POINTLIST && - prim != V_008958_DI_PT_LINESTRIP && - prim != V_008958_DI_PT_TRISTRIP)))) - wd_switch_on_eop = true; - /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. * We don't know that for indirect drawing, so treat it as * always problematic. */ @@ -793,10 +747,6 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, partial_es_wave = true; if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { - /* GS requirement. */ - if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3) - partial_es_wave = true; - /* Hw bug with single-primitive instances and SWITCH_ON_EOI * on multi-SE chips. */ if (info->max_se >= 2 && ia_switch_on_eoi && -- 2.9.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev