On 31 March 2017 at 16:59, Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> wrote: > On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie <airl...@gmail.com> wrote: >> From: Dave Airlie <airl...@redhat.com> >> >> This patch adds support for the offchip rings for storing >> tessellation factors and attribute data. >> >> It includes the register setup for the TF ring >> >> Signed-off-by: Dave Airlie <airl...@redhat.com> >> --- >> src/amd/vulkan/radv_cmd_buffer.c | 6 ++ >> src/amd/vulkan/radv_device.c | 210 >> ++++++++++++++++++++++++++++++++++++--- >> src/amd/vulkan/radv_private.h | 4 + >> 3 files changed, 207 insertions(+), 13 deletions(-) >> >> diff --git a/src/amd/vulkan/radv_cmd_buffer.c >> b/src/amd/vulkan/radv_cmd_buffer.c >> index dbd74de..70f6fad 100644 >> --- a/src/amd/vulkan/radv_cmd_buffer.c >> +++ b/src/amd/vulkan/radv_cmd_buffer.c >> @@ -221,6 +221,7 @@ static void radv_reset_cmd_buffer(struct >> radv_cmd_buffer *cmd_buffer) >> cmd_buffer->compute_scratch_size_needed = 0; >> cmd_buffer->esgs_ring_size_needed = 0; >> cmd_buffer->gsvs_ring_size_needed = 0; >> + cmd_buffer->tess_rings_needed = false; >> >> if (cmd_buffer->upload.upload_bo) >> cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, >> @@ -1896,6 +1897,9 @@ void radv_CmdBindPipeline( >> if (pipeline->graphics.gsvs_ring_size > >> cmd_buffer->gsvs_ring_size_needed) >> cmd_buffer->gsvs_ring_size_needed = >> pipeline->graphics.gsvs_ring_size; >> >> + if (radv_pipeline_has_tess(pipeline)) >> + cmd_buffer->tess_rings_needed = true; >> + >> if (radv_pipeline_has_gs(pipeline)) { >> struct ac_userdata_info *loc = >> radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, >> >> AC_UD_SCRATCH_RING_OFFSETS); >> @@ -2063,6 +2067,8 @@ void radv_CmdExecuteCommands( >> primary->esgs_ring_size_needed = >> secondary->esgs_ring_size_needed; >> if (secondary->gsvs_ring_size_needed > >> primary->gsvs_ring_size_needed) >> primary->gsvs_ring_size_needed = >> secondary->gsvs_ring_size_needed; >> + if (secondary->tess_rings_needed) >> + primary->tess_rings_needed = true; >> >> if (secondary->ring_offsets_idx != -1) { >> if (primary->ring_offsets_idx == -1) >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >> index fe531e1..b75d76b 100644 >> --- a/src/amd/vulkan/radv_device.c >> +++ b/src/amd/vulkan/radv_device.c >> @@ -845,6 +845,10 @@ radv_queue_finish(struct radv_queue *queue) >> queue->device->ws->buffer_destroy(queue->esgs_ring_bo); >> if (queue->gsvs_ring_bo) >> queue->device->ws->buffer_destroy(queue->gsvs_ring_bo); >> + if (queue->tess_factor_ring_bo) >> + >> queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo); >> + if (queue->tess_offchip_ring_bo) >> + >> queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo); >> if (queue->compute_scratch_bo) >> queue->device->ws->buffer_destroy(queue->compute_scratch_bo); >> } >> @@ -1182,20 +1186,29 @@ static void radv_dump_trace(struct radv_device >> *device, >> } >> >> static void >> -fill_geom_rings(struct radv_queue *queue, >> - uint32_t *map, >> - uint32_t esgs_ring_size, >> - struct radeon_winsys_bo *esgs_ring_bo, >> - uint32_t gsvs_ring_size, >> - struct radeon_winsys_bo *gsvs_ring_bo) >> +fill_geom_tess_rings(struct radv_queue *queue, >> + uint32_t *map, >> + uint32_t esgs_ring_size, >> + struct radeon_winsys_bo *esgs_ring_bo, >> + uint32_t gsvs_ring_size, >> + struct radeon_winsys_bo *gsvs_ring_bo, >> + uint32_t tess_factor_ring_size, >> + struct radeon_winsys_bo *tess_factor_ring_bo, >> + uint32_t tess_offchip_ring_size, >> + struct radeon_winsys_bo *tess_offchip_ring_bo) >> { >> uint64_t esgs_va = 0, gsvs_va = 0; >> + uint64_t tess_factor_va = 0, tess_offchip_va = 0; >> uint32_t *desc = &map[4]; >> >> if (esgs_ring_bo) >> esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo); >> if (gsvs_ring_bo) >> gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo); >> + if (tess_factor_ring_bo) >> + tess_factor_va = >> queue->device->ws->buffer_get_va(tess_factor_ring_bo); >> + if (tess_offchip_ring_bo) >> + tess_offchip_va = >> queue->device->ws->buffer_get_va(tess_offchip_ring_bo); >> >> /* stride 0, num records - size, add tid, swizzle, elsize4, >> index stride 64 */ >> @@ -1270,6 +1283,88 @@ fill_geom_rings(struct radv_queue *queue, >> S_008F0C_ELEMENT_SIZE(1) | >> S_008F0C_INDEX_STRIDE(1) | >> S_008F0C_ADD_TID_ENABLE(true); >> + desc += 4; >> + >> + desc[0] = tess_factor_va; >> + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) | >> + S_008F04_STRIDE(0) | >> + S_008F04_SWIZZLE_ENABLE(false); >> + desc[2] = tess_factor_ring_size; >> + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | >> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | >> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | >> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | >> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | >> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | >> + S_008F0C_ELEMENT_SIZE(0) | >> + S_008F0C_INDEX_STRIDE(0) | >> + S_008F0C_ADD_TID_ENABLE(false); >> + desc += 4; >> + >> + desc[0] = tess_offchip_va; >> + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | >> + S_008F04_STRIDE(0) | >> + S_008F04_SWIZZLE_ENABLE(false); >> + desc[2] = tess_offchip_ring_size; >> + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | >> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | >> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | >> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | >> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | >> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | >> + S_008F0C_ELEMENT_SIZE(0) | >> + S_008F0C_INDEX_STRIDE(0) | >> + S_008F0C_ADD_TID_ENABLE(false); >> +} >> + >> +static unsigned >> +radv_get_hs_offchip_param(struct radv_device *device, uint32_t >> *max_offchip_buffers_p) >> +{ >> + bool double_offchip_buffers = >> device->physical_device->rad_info.chip_class >= CIK && >> + device->physical_device->rad_info.family != CHIP_CARRIZO && >> + device->physical_device->rad_info.family != CHIP_STONEY; >> + unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : >> 64; >> + unsigned max_offchip_buffers = max_offchip_buffers_per_se * >> + device->physical_device->rad_info.max_se; >> + unsigned offchip_granularity; >> + unsigned hs_offchip_param; >> + switch (device->tess_offchip_block_dw_size) { >> + default: >> + assert(0); >> + /* fall through */ >> + case 8192: >> + offchip_granularity = V_03093C_X_8K_DWORDS; >> + break; >> + case 4096: >> + offchip_granularity = V_03093C_X_4K_DWORDS; >> + break; >> + } >> + >> + switch (device->physical_device->rad_info.chip_class) { >> + case SI: >> + max_offchip_buffers = MIN2(max_offchip_buffers, 126); >> + break; >> + case CIK: >> + max_offchip_buffers = MIN2(max_offchip_buffers, 508); >> + break; >> + case VI: >> + default: >> + max_offchip_buffers = MIN2(max_offchip_buffers, 512); >> + break; >> + } >> + >> + *max_offchip_buffers_p = max_offchip_buffers; >> + if (device->physical_device->rad_info.chip_class >= CIK) { >> + if (device->physical_device->rad_info.chip_class >= VI) >> + --max_offchip_buffers; >> + hs_offchip_param = >> + S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | >> + S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); >> + } else { >> + hs_offchip_param = >> + S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); >> + } >> + return hs_offchip_param; >> } >> >> static VkResult >> @@ -1278,6 +1373,7 @@ radv_get_preamble_cs(struct radv_queue *queue, >> uint32_t compute_scratch_size, >> uint32_t esgs_ring_size, >> uint32_t gsvs_ring_size, >> + bool needs_tess_rings, >> struct radeon_winsys_cs **initial_preamble_cs, >> struct radeon_winsys_cs **continue_preamble_cs) >> { >> @@ -1286,12 +1382,32 @@ radv_get_preamble_cs(struct radv_queue *queue, >> struct radeon_winsys_bo *compute_scratch_bo = NULL; >> struct radeon_winsys_bo *esgs_ring_bo = NULL; >> struct radeon_winsys_bo *gsvs_ring_bo = NULL; >> + struct radeon_winsys_bo *tess_factor_ring_bo = NULL; >> + struct radeon_winsys_bo *tess_offchip_ring_bo = NULL; >> struct radeon_winsys_cs *dest_cs[2] = {0}; >> + bool add_tess_rings = false; >> + unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; >> + unsigned max_offchip_buffers; >> + unsigned hs_offchip_param = 0; >> + if (!queue->has_tess_rings) { >> + if (needs_tess_rings) >> + add_tess_rings = true; >> + } >> + >> + if (add_tess_rings) { >> + tess_factor_ring_size = 32768 * >> queue->device->physical_device->rad_info.max_se; >> + hs_offchip_param = radv_get_hs_offchip_param(queue->device, >> + >> &max_offchip_buffers); > This only sets hs_offchip_param when the tess rings are added. We also > need it if the tess rings already exist but we reemit the preamble due > to other reasons.
Yes there is a later patch that fixes that, I worked it out today. I'll squash it in here. Dave. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev