From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/sid.h | 11 +++++++++-- src/amd/vulkan/radv_cmd_buffer.c | 6 +++--- src/amd/vulkan/radv_query.c | 8 ++++---- src/gallium/drivers/radeonsi/si_compute.c | 2 +- src/gallium/drivers/radeonsi/si_perfcounter.c | 6 +++--- src/gallium/drivers/radeonsi/si_query.c | 2 +- src/gallium/drivers/radeonsi/si_state_draw.c | 2 +- 7 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index d20b5484223..b3321ea3a77 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -153,28 +153,35 @@ #define R_3F2_CONTROL 0x3F2 #define S_3F2_IB_SIZE(x) (((unsigned)(x) & 0xfffff) << 0) #define G_3F2_IB_SIZE(x) (((unsigned)(x) >> 0) & 0xfffff) #define S_3F2_CHAIN(x) (((unsigned)(x) & 0x1) << 20) #define G_3F2_CHAIN(x) (((unsigned)(x) >> 20) & 0x1) #define S_3F2_VALID(x) (((unsigned)(x) & 0x1) << 23) #define PKT3_COPY_DATA 0x40 #define COPY_DATA_SRC_SEL(x) ((x) & 0xf) #define COPY_DATA_REG 0 -#define COPY_DATA_MEM 1 +#define COPY_DATA_SRC_MEM 1 /* only valid as source */ +#define COPY_DATA_TC_L2 2 +#define COPY_DATA_GDS 3 #define COPY_DATA_PERF 4 #define COPY_DATA_IMM 5 #define COPY_DATA_TIMESTAMP 9 #define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8) -#define COPY_DATA_MEM_ASYNC 5 +#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */ +#define COPY_DATA_TC_L2 2 +#define COPY_DATA_GDS 3 +#define COPY_DATA_PERF 4 +#define COPY_DATA_DST_MEM 5 #define COPY_DATA_COUNT_SEL (1 << 16) #define COPY_DATA_WR_CONFIRM (1 << 20) +#define COPY_DATA_ENGINE_PFP (1 << 30) #define PKT3_PFP_SYNC_ME 0x42 #define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use ACQUIRE_MEM */ #define PKT3_ME_INITIALIZE 0x44 /* not on CIK */ #define PKT3_COND_WRITE 0x45 #define PKT3_EVENT_WRITE 0x46 #define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */ #define EOP_INT_SEL(x) ((x) << 24) #define EOP_INT_SEL_NONE 0 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3 #define EOP_DATA_SEL(x) ((x) << 29) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index d492456d6b8..339704990e2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1290,21 +1290,21 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { ++reg_count; } else { ++reg_offset; va += 4; } if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ++reg_count; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); radeon_emit(cs, 0); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } @@ -1420,21 +1420,21 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, uint64_t va = radv_buffer_get_va(image->bo); va += image->offset + image->clear_value_offset; if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image)) return; uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_COUNT_SEL); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, reg >> 2); radeon_emit(cs, 0); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); radeon_emit(cs, 0); } @@ -3734,21 +3734,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); va += info->indirect->offset + info->indirect_offset; radv_cs_add_buffer(ws, cs, info->indirect->bo); if (loc->sgpr_idx != -1) { for (unsigned i = 0; i < 3; ++i) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); radeon_emit(cs, (va + 4 * i)); radeon_emit(cs, (va + 4 * i) >> 32); radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); radeon_emit(cs, 0); } } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index d607d24cfc6..3af56266cea 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1046,31 +1046,31 @@ void radv_CmdCopyQueryPoolResults( radeon_emit(cs, local_src_va); radeon_emit(cs, local_src_va >> 32); radeon_emit(cs, TIMESTAMP_NOT_READY >> 32); radeon_emit(cs, 0xffffffff); radeon_emit(cs, 4); } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { uint64_t avail_dest_va = dest_va + elem_size; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM)); radeon_emit(cs, local_src_va); radeon_emit(cs, local_src_va >> 32); radeon_emit(cs, avail_dest_va); radeon_emit(cs, avail_dest_va >> 32); } radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | ((flags & VK_QUERY_RESULT_64_BIT) ? COPY_DATA_COUNT_SEL : 0)); radeon_emit(cs, local_src_va); radeon_emit(cs, local_src_va >> 32); radeon_emit(cs, dest_va); radeon_emit(cs, dest_va >> 32); assert(cs->cdw <= cdw_max); } break; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e0c6902fec4..cbcd8e79c7b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -727,21 +727,21 @@ static void si_setup_tgsi_user_data(struct si_context *sctx, uint64_t base_va = r600_resource(info->indirect)->gpu_address; uint64_t va = base_va + info->indirect_offset; int i; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, r600_resource(info->indirect), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); for (i = 0; i < 3; ++i) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); radeon_emit(cs, (va + 4 * i)); radeon_emit(cs, (va + 4 * i) >> 32); radeon_emit(cs, (grid_size_reg >> 2) + i); radeon_emit(cs, 0); } } } else { if (program->uses_grid_size) { radeon_set_sh_reg_seq(cs, grid_size_reg, 3); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index c4f6e164fb5..de71572c8aa 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -552,21 +552,21 @@ static void si_pc_emit_select(struct si_context *sctx, static void si_pc_emit_start(struct si_context *sctx, struct r600_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM)); radeon_emit(cs, 1); /* immediate */ radeon_emit(cs, 0); /* unused */ radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, @@ -609,34 +609,34 @@ static void si_pc_emit_read(struct si_context *sctx, if (!(regs->layout & SI_PC_FAKE)) { if (regs->layout & SI_PC_REG_REVERSE) reg_delta = -reg_delta; for (idx = 0; idx < count; ++idx) { if (regs->counters) reg = regs->counters[idx]; radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | COPY_DATA_COUNT_SEL); /* 64 bits */ radeon_emit(cs, reg >> 2); radeon_emit(cs, 0); /* unused */ radeon_emit(cs, va); radeon_emit(cs, va >> 32); va += sizeof(uint64_t); reg += reg_delta; } } else { for (idx = 0; idx < count; ++idx) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | COPY_DATA_COUNT_SEL); radeon_emit(cs, 0); /* immediate */ radeon_emit(cs, 0); radeon_emit(cs, va); radeon_emit(cs, va >> 32); va += sizeof(uint64_t); } } } diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 80e84c23937..bdd7e2c060c 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -792,21 +792,21 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: /* Write the timestamp from the CP not waiting for * outstanding draws (top-of-pipe). */ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC)); + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM)); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, va); radeon_emit(cs, va >> 32); break; case PIPE_QUERY_PIPELINE_STATISTICS: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index b1d7437edb9..fceb9debc47 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -664,21 +664,21 @@ static void si_emit_draw_packets(struct si_context *sctx, if (info->count_from_stream_output) { struct si_streamout_target *t = (struct si_streamout_target*)info->count_from_stream_output; uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, va); /* src address lo */ radeon_emit(cs, va >> 32); /* src address hi */ radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); radeon_emit(cs, 0); /* unused */ radeon_add_to_buffer_list(sctx, sctx->gfx_cs, t->buf_filled_size, RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev