v2:
  - Now use the filename specified by RADV_TRACE_FILE env var.
  - Use the same var to enable tracing.

I thought we could as well always set the filename explicitly
instead of having some arbitrary defaults, and at that point
we don't need a separate feature enable.

Signed-off-by: Bas Nieuwenhuizen <ba...@google.com>
---
 src/amd/vulkan/radv_cmd_buffer.c              | 35 ++++++++++++
 src/amd/vulkan/radv_device.c                  | 82 ++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h                 |  5 ++
 src/amd/vulkan/radv_radeon_winsys.h           |  2 +
 src/amd/vulkan/si_cmd_buffer.c                |  5 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 30 ++++++++++
 6 files changed, 150 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index fdb35a0060..651b1dd452 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -32,6 +32,8 @@
 #include "vk_format.h"
 #include "radv_meta.h"
 
+#include "ac_debug.h"
+
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
                                         struct radv_image *image,
                                         VkImageLayout src_layout,
@@ -272,6 +274,32 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer 
*cmd_buffer,
        return true;
 }
 
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
+{
+       struct radv_device *device = cmd_buffer->device;
+       struct radeon_winsys_cs *cs = cmd_buffer->cs;
+       uint64_t va;
+
+       if (!device->trace_bo)
+               return;
+
+       va = device->ws->buffer_get_va(device->trace_bo);
+
+       MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
+
+       ++cmd_buffer->state.trace_id;
+       device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+       radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+                   S_370_WR_CONFIRM(1) |
+                   S_370_ENGINE_SEL(V_370_ME));
+       radeon_emit(cs, va);
+       radeon_emit(cs, va >> 32);
+       radeon_emit(cs, cmd_buffer->state.trace_id);
+       radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+       radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
+}
+
 static void
 radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
                               struct radv_pipeline *pipeline)
@@ -1929,6 +1957,8 @@ void radv_CmdDraw(
                    S_0287F0_USE_OPAQUE(0));
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
+
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
@@ -1984,6 +2014,7 @@ void radv_CmdDrawIndexed(
        radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void
@@ -2035,6 +2066,7 @@ radv_emit_indirect_draw(struct radv_cmd_buffer 
*cmd_buffer,
        radeon_emit(cs, count_va >> 32);
        radeon_emit(cs, stride); /* stride */
        radeon_emit(cs, di_src_sel);
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void
@@ -2188,6 +2220,7 @@ void radv_CmdDispatch(
        radeon_emit(cmd_buffer->cs, 1);
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_CmdDispatchIndirect(
@@ -2239,6 +2272,7 @@ void radv_CmdDispatchIndirect(
        }
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_unaligned_dispatch(
@@ -2292,6 +2326,7 @@ void radv_unaligned_dispatch(
                                    S_00B800_PARTIAL_TG_EN(1));
 
        assert(cmd_buffer->cs->cdw <= cdw_max);
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void radv_CmdEndRenderPass(
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e57a419cfa..ef8ca1a375 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -760,16 +760,34 @@ VkResult radv_CreateDevice(
                device->ws->cs_finalize(device->empty_cs[family]);
        }
 
+       if (getenv("RADV_TRACE_FILE")) {
+               device->trace_bo = device->ws->buffer_create(device->ws, 4096, 
8,
+                                                            
RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
+               if (!device->trace_bo)
+                       goto fail;
+
+               device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
+               if (!device->trace_id_ptr)
+                       goto fail;
+       }
+
        *pDevice = radv_device_to_handle(device);
        return VK_SUCCESS;
 
 fail:
+       if (device->trace_bo)
+               device->ws->buffer_destroy(device->trace_bo);
+
        for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
                for (unsigned q = 0; q < device->queue_count[i]; q++)
                        radv_queue_finish(&device->queues[i][q]);
                if (device->queue_count[i])
                        vk_free(&device->alloc, device->queues[i]);
        }
+
+       if (device->hw_ctx)
+               device->ws->ctx_destroy(device->hw_ctx);
+
        vk_free(&device->alloc, device);
        return result;
 }
@@ -780,6 +798,9 @@ void radv_DestroyDevice(
 {
        RADV_FROM_HANDLE(radv_device, device, _device);
 
+       if (device->trace_bo)
+               device->ws->buffer_destroy(device->trace_bo);
+
        device->ws->ctx_destroy(device->hw_ctx);
        for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
                for (unsigned q = 0; q < device->queue_count[i]; q++)
@@ -869,6 +890,21 @@ void radv_GetDeviceQueue(
        *pQueue = 
radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
 }
 
+static void radv_dump_trace(struct radv_device *device,
+                           struct radeon_winsys_cs *cs)
+{
+       const char *filename = getenv("RADV_TRACE_FILE");
+       FILE *f = fopen(filename, "w");
+       if (!f) {
+               fprintf(stderr, "Failed to write trace dump to %s\n", filename);
+               return;
+       }
+
+       fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+       device->ws->cs_dump(cs, f, *device->trace_id_ptr);
+       fclose(f);
+}
+
 VkResult radv_QueueSubmit(
        VkQueue                                     _queue,
        uint32_t                                    submitCount,
@@ -880,10 +916,12 @@ VkResult radv_QueueSubmit(
        struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
        struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
        int ret;
+       uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
 
        for (uint32_t i = 0; i < submitCount; i++) {
                struct radeon_winsys_cs **cs_array;
                bool can_patch = true;
+               uint32_t advance;
 
                if (!pSubmits[i].commandBufferCount)
                        continue;
@@ -900,15 +938,41 @@ VkResult radv_QueueSubmit(
                        if ((cmd_buffer->usage_flags & 
VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
                                can_patch = false;
                }
-               ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, 
cs_array,
-                                                  
pSubmits[i].commandBufferCount,
-                                                  (struct radeon_winsys_sem 
**)pSubmits[i].pWaitSemaphores,
-                                                  
pSubmits[i].waitSemaphoreCount,
-                                                  (struct radeon_winsys_sem 
**)pSubmits[i].pSignalSemaphores,
-                                                  
pSubmits[i].signalSemaphoreCount,
-                                                  can_patch, base_fence);
-               if (ret)
-                       radv_loge("failed to submit CS %d\n", i);
+
+               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += 
advance) {
+                       advance = MIN2(max_cs_submission,
+                                      pSubmits[i].commandBufferCount - j);
+                       bool b = j == 0;
+                       bool e = j + advance == pSubmits[i].commandBufferCount;
+
+                       if (queue->device->trace_bo)
+                               *queue->device->trace_id_ptr = 0;
+
+                       ret = queue->device->ws->cs_submit(ctx, 
queue->queue_idx, cs_array,
+                                                       
pSubmits[i].commandBufferCount,
+                                                       (struct 
radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
+                                                       b ? 
pSubmits[i].waitSemaphoreCount : 0,
+                                                       (struct 
radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
+                                                       e ? 
pSubmits[i].signalSemaphoreCount : 0,
+                                                       can_patch, base_fence);
+
+                       if (ret) {
+                               radv_loge("failed to submit CS %d\n", i);
+                               abort();
+                       }
+                       if (queue->device->trace_bo) {
+                               bool success = queue->device->ws->ctx_wait_idle(
+                                                       queue->device->hw_ctx,
+                                                       
radv_queue_family_to_ring(
+                                                               
queue->queue_family_index),
+                                                       queue->queue_idx);
+
+                               if (!success) { /* Hang */
+                                       radv_dump_trace(queue->device, 
cs_array[j]);
+                                       abort();
+                               }
+                       }
+               }
                free(cs_array);
        }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index f76d38dba6..9bae7494a9 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -487,6 +487,9 @@ struct radv_device {
        float sample_locations_4x[4][2];
        float sample_locations_8x[8][2];
        float sample_locations_16x[16][2];
+
+       struct radeon_winsys_bo                      *trace_bo;
+       uint32_t                                     *trace_id_ptr;
 };
 
 struct radv_device_memory {
@@ -677,6 +680,7 @@ struct radv_cmd_state {
        unsigned                                     active_occlusion_queries;
        float                                        offset_scale;
        uint32_t                                      descriptors_dirty;
+       uint32_t                                      trace_id;
 };
 
 struct radv_cmd_pool {
@@ -771,6 +775,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer 
*cmd_buffer,
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
                      struct radeon_winsys_bo *bo,
                      uint64_t offset, uint64_t size, uint32_t value);
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
 
 /*
  * Takes x,y,z as exact numbers of invocations, instead of blocks.
diff --git a/src/amd/vulkan/radv_radeon_winsys.h 
b/src/amd/vulkan/radv_radeon_winsys.h
index 4b738b8cf4..a0b5092e30 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -319,6 +319,8 @@ struct radeon_winsys {
        void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
                                    struct radeon_winsys_cs *child);
 
+       void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t 
trace_id);
+
        int (*surface_init)(struct radeon_winsys *ws,
                            struct radeon_surf *surf);
 
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index e3f883f50b..a483ad9fd3 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -718,6 +718,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
                }
        }
 
+       if (cmd_buffer->state.flush_bits)
+               radv_cmd_buffer_trace_emit(cmd_buffer);
        cmd_buffer->state.flush_bits = 0;
 }
 
@@ -780,6 +782,8 @@ static void si_emit_cp_dma_copy_buffer(struct 
radv_cmd_buffer *cmd_buffer,
                radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
                radeon_emit(cs, 0);
        }
+
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -820,6 +824,7 @@ static void si_emit_cp_dma_clear_buffer(struct 
radv_cmd_buffer *cmd_buffer,
                radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
                radeon_emit(cs, 0);
        }
+       radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t 
byte_count,
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index b24aa99749..99b16192bc 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include <amdgpu_drm.h>
 #include <assert.h>
 
+#include "ac_debug.h"
 #include "amdgpu_id.h"
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_cs.h"
@@ -775,6 +776,34 @@ static int radv_amdgpu_winsys_cs_submit(struct 
radeon_winsys_ctx *_ctx,
        return ret;
 }
 
+
+static void *radv_amdgpu_winsys_get_cpu_addr(struct radv_amdgpu_cs *cs, 
uint64_t addr)
+{
+       void *ret = NULL;
+       for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+               struct radv_amdgpu_winsys_bo *bo;
+
+               bo = (struct radv_amdgpu_winsys_bo*)
+                      (i == cs->num_old_ib_buffers ? cs->ib_buffer : 
cs->old_ib_buffers[i]);
+               if (addr >= bo->va && addr - bo->va < bo->size) {
+                       if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+                               return (char *)ret + (addr - bo->va);
+               }
+       }
+       return ret;
+}
+
+static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
+                                       FILE* file,
+                                       uint32_t trace_id)
+{
+       struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+
+       ac_parse_ib(file,
+                   radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
+                   cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class);
+}
+
 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys 
*_ws)
 {
        struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -850,6 +879,7 @@ void radv_amdgpu_cs_init_functions(struct 
radv_amdgpu_winsys *ws)
        ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
        ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
        ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+       ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
        ws->base.create_fence = radv_amdgpu_create_fence;
        ws->base.destroy_fence = radv_amdgpu_destroy_fence;
        ws->base.create_sem = radv_amdgpu_create_sem;
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to