On Mon, Jan 2, 2017 at 7:57 PM, Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> wrote: > Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> > --- > src/amd/vulkan/radv_cmd_buffer.c | 35 +++++++++++++ > src/amd/vulkan/radv_device.c | 71 > +++++++++++++++++++++++---- > src/amd/vulkan/radv_private.h | 5 ++ > src/amd/vulkan/radv_radeon_winsys.h | 2 + > src/amd/vulkan/si_cmd_buffer.c | 5 ++ > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 +++++++++++ > 6 files changed, 139 insertions(+), 9 deletions(-) > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > b/src/amd/vulkan/radv_cmd_buffer.c > index fdb35a0060..651b1dd452 100644 > --- a/src/amd/vulkan/radv_cmd_buffer.c > +++ b/src/amd/vulkan/radv_cmd_buffer.c > @@ -32,6 +32,8 @@ > #include "vk_format.h" > #include "radv_meta.h" > > +#include "ac_debug.h" > + > static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, > struct radv_image *image, > VkImageLayout src_layout, > @@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer > *cmd_buffer, > return true; > } > > +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) > +{ > + struct radv_device *device = cmd_buffer->device; > + struct radeon_winsys_cs *cs = cmd_buffer->cs; > + uint64_t va; > + > + if (!device->trace_bo) > + return; > + > + va = device->ws->buffer_get_va(device->trace_bo); > + > + MAYBE_UNUSED unsigned cdw_max = > radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7); > + > + ++cmd_buffer->state.trace_id; > + device->ws->cs_add_buffer(cs, device->trace_bo, 8); > + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); > + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | > + S_370_WR_CONFIRM(1) | > + S_370_ENGINE_SEL(V_370_ME)); > + radeon_emit(cs, va); > + radeon_emit(cs, va >> 32); > + radeon_emit(cs, cmd_buffer->state.trace_id); > + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); > + radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); > +} > + > static void > radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer, > struct radv_pipeline *pipeline) > @@ -1929,6 +1957,8 @@ void radv_CmdDraw( > S_0287F0_USE_OPAQUE(0)); > > assert(cmd_buffer->cs->cdw <= cdw_max); > + > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > static void radv_emit_primitive_reset_index(struct radv_cmd_buffer > *cmd_buffer) > @@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed( > radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); > > assert(cmd_buffer->cs->cdw <= cdw_max); > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > static void > @@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer > *cmd_buffer, > radeon_emit(cs, count_va >> 32); > radeon_emit(cs, stride); /* stride */ > radeon_emit(cs, di_src_sel); > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > static void > @@ -2188,6 +2220,7 @@ void radv_CmdDispatch( > radeon_emit(cmd_buffer->cs, 1); > > assert(cmd_buffer->cs->cdw <= cdw_max); > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > void radv_CmdDispatchIndirect( > @@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect( > } > > assert(cmd_buffer->cs->cdw <= cdw_max); > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > void radv_unaligned_dispatch( > @@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch( > S_00B800_PARTIAL_TG_EN(1)); > > assert(cmd_buffer->cs->cdw <= cdw_max); > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > void radv_CmdEndRenderPass( > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index e57a419cfa..54cedc2943 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -760,16 +760,34 @@ VkResult radv_CreateDevice( > device->ws->cs_finalize(device->empty_cs[family]); > } > > + if (false) { > + device->trace_bo = device->ws->buffer_create(device->ws, > 4096, 8, > + > RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); > + if (!device->trace_bo) > + goto fail; > + > + device->trace_id_ptr = > device->ws->buffer_map(device->trace_bo); > + if (!device->trace_id_ptr) > + goto fail; > + } > + > *pDevice = radv_device_to_handle(device); > return VK_SUCCESS; > > fail: > + if (device->trace_bo) > + device->ws->buffer_destroy(device->trace_bo); > + > for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { > for (unsigned q = 0; q < device->queue_count[i]; q++) > radv_queue_finish(&device->queues[i][q]); > if (device->queue_count[i]) > vk_free(&device->alloc, device->queues[i]); > } > + > + if (device->hw_ctx) > + device->ws->ctx_destroy(device->hw_ctx); > + > vk_free(&device->alloc, device); > return result; > } > @@ -780,6 +798,9 @@ void radv_DestroyDevice( > { > RADV_FROM_HANDLE(radv_device, device, _device); > > + if (device->trace_bo) > + device->ws->buffer_destroy(device->trace_bo); > + > device->ws->ctx_destroy(device->hw_ctx); > for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { > for (unsigned q = 0; q < device->queue_count[i]; q++) > @@ -880,10 +901,12 @@ VkResult radv_QueueSubmit( > struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; > struct radeon_winsys_ctx *ctx = queue->device->hw_ctx; > int ret; > + uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; > > for (uint32_t i = 0; i < submitCount; i++) { > struct radeon_winsys_cs **cs_array; > bool can_patch = true; > + uint32_t advance; > > if (!pSubmits[i].commandBufferCount) > continue; > @@ -900,15 +923,45 @@ VkResult radv_QueueSubmit( > if ((cmd_buffer->usage_flags & > VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) > can_patch = false; > } > - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, > cs_array, > - > pSubmits[i].commandBufferCount, > - (struct radeon_winsys_sem > **)pSubmits[i].pWaitSemaphores, > - > pSubmits[i].waitSemaphoreCount, > - (struct radeon_winsys_sem > **)pSubmits[i].pSignalSemaphores, > - > pSubmits[i].signalSemaphoreCount, > - can_patch, base_fence); > - if (ret) > - radv_loge("failed to submit CS %d\n", i); > + > + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += > advance) { > + advance = MIN2(max_cs_submission, > + pSubmits[i].commandBufferCount - j); > + bool b = j == 0; > + bool e = j + advance == > pSubmits[i].commandBufferCount; > + > + if (queue->device->trace_bo) > + *queue->device->trace_id_ptr = 0; > + > + ret = queue->device->ws->cs_submit(ctx, > queue->queue_idx, cs_array, > + > pSubmits[i].commandBufferCount, > + (struct > radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, > + b ? > pSubmits[i].waitSemaphoreCount : 0, > + (struct > radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, > + e ? > pSubmits[i].signalSemaphoreCount : 0, > + can_patch, > base_fence); > + > + if (ret) { > + radv_loge("failed to submit CS %d\n", i); > + abort(); > + } > + if (queue->device->trace_bo) { > + bool success = > queue->device->ws->ctx_wait_idle( > + queue->device->hw_ctx, > + > radv_queue_family_to_ring( > + > queue->queue_family_index), > + queue->queue_idx); > + > + if (!success) { /* Hang */ > + FILE *f = fopen("/home/bas/hang.log", > "w");
Just noticed that I forgot to non-hardcode this. Will send a v2 soon. > + fprintf(f, "Trace ID: %x\n", > *queue->device->trace_id_ptr); > + > queue->device->ws->cs_dump(cs_array[j], f, > + > *queue->device->trace_id_ptr); > + fclose(f); > + abort(); > + } > + } > + } > free(cs_array); > } > > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index f76d38dba6..9bae7494a9 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -487,6 +487,9 @@ struct radv_device { > float sample_locations_4x[4][2]; > float sample_locations_8x[8][2]; > float sample_locations_16x[16][2]; > + > + struct radeon_winsys_bo *trace_bo; > + uint32_t *trace_id_ptr; > }; > > struct radv_device_memory { > @@ -677,6 +680,7 @@ struct radv_cmd_state { > unsigned active_occlusion_queries; > float offset_scale; > uint32_t descriptors_dirty; > + uint32_t trace_id; > }; > > struct radv_cmd_pool { > @@ -771,6 +775,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer > *cmd_buffer, > void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, > struct radeon_winsys_bo *bo, > uint64_t offset, uint64_t size, uint32_t value); > +void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer); > > /* > * Takes x,y,z as exact numbers of invocations, instead of blocks. > diff --git a/src/amd/vulkan/radv_radeon_winsys.h > b/src/amd/vulkan/radv_radeon_winsys.h > index 4b738b8cf4..a0b5092e30 100644 > --- a/src/amd/vulkan/radv_radeon_winsys.h > +++ b/src/amd/vulkan/radv_radeon_winsys.h > @@ -319,6 +319,8 @@ struct radeon_winsys { > void (*cs_execute_secondary)(struct radeon_winsys_cs *parent, > struct radeon_winsys_cs *child); > > + void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t > trace_id); > + > int (*surface_init)(struct radeon_winsys *ws, > struct radeon_surf *surf); > > diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c > index e3f883f50b..a483ad9fd3 100644 > --- a/src/amd/vulkan/si_cmd_buffer.c > +++ b/src/amd/vulkan/si_cmd_buffer.c > @@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) > } > } > > + if (cmd_buffer->state.flush_bits) > + radv_cmd_buffer_trace_emit(cmd_buffer); > cmd_buffer->state.flush_bits = 0; > } > > @@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct > radv_cmd_buffer *cmd_buffer, > radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); > radeon_emit(cs, 0); > } > + > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. > */ > @@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct > radv_cmd_buffer *cmd_buffer, > radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); > radeon_emit(cs, 0); > } > + radv_cmd_buffer_trace_emit(cmd_buffer); > } > > static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t > byte_count, > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > index b24aa99749..99b16192bc 100644 > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > @@ -27,6 +27,7 @@ > #include <amdgpu_drm.h> > #include <assert.h> > > +#include "ac_debug.h" > #include "amdgpu_id.h" > #include "radv_radeon_winsys.h" > #include "radv_amdgpu_cs.h" > @@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct > radeon_winsys_ctx *_ctx, > return ret; > } > > + > +static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, > uint64_t addr) > +{ > + void *ret = NULL; > + for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) { > + struct radv_amdgpu_winsys_bo *bo; > + > + bo = (struct radv_amdgpu_winsys_bo*) > + (i == cs->num_old_ib_buffers ? cs->ib_buffer : > cs->old_ib_buffers[i]); > + if (addr >= bo->va && addr - bo->va < bo->size) { > + if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) > + return (char *)ret + (addr - bo->va); > + } > + } > + return ret; > +} > + > +static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs, > + FILE* file, > + uint32_t trace_id) > +{ > + struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs; > + > + ac_parse_ib(file, > + radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address), > + cs->ib.size, trace_id, "main IB", > cs->ws->info.chip_class); > +} > + > static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys > *_ws) > { > struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); > @@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct > radv_amdgpu_winsys *ws) > ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer; > ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary; > ws->base.cs_submit = radv_amdgpu_winsys_cs_submit; > + ws->base.cs_dump = radv_amdgpu_winsys_cs_dump; > ws->base.create_fence = radv_amdgpu_create_fence; > ws->base.destroy_fence = radv_amdgpu_destroy_fence; > ws->base.create_sem = radv_amdgpu_create_sem; > -- > 2.11.0 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev