From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/sid.h                          | 11 +++++++++--
 src/amd/vulkan/radv_cmd_buffer.c              |  6 +++---
 src/amd/vulkan/radv_query.c                   |  8 ++++----
 src/gallium/drivers/radeonsi/si_compute.c     |  2 +-
 src/gallium/drivers/radeonsi/si_perfcounter.c |  6 +++---
 src/gallium/drivers/radeonsi/si_query.c       |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 7 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index d20b5484223..b3321ea3a77 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -153,28 +153,35 @@
 #define   R_3F2_CONTROL                        0x3F2
 #define     S_3F2_IB_SIZE(x)                   (((unsigned)(x) & 0xfffff) << 0)
 #define     G_3F2_IB_SIZE(x)                   (((unsigned)(x) >> 0) & 0xfffff)
 #define     S_3F2_CHAIN(x)                     (((unsigned)(x) & 0x1) << 20)
 #define     G_3F2_CHAIN(x)                     (((unsigned)(x) >> 20) & 0x1)
 #define     S_3F2_VALID(x)                     (((unsigned)(x) & 0x1) << 23)
 
 #define PKT3_COPY_DATA                        0x40
 #define                COPY_DATA_SRC_SEL(x)            ((x) & 0xf)
 #define                        COPY_DATA_REG           0
-#define                        COPY_DATA_MEM           1
+#define                        COPY_DATA_SRC_MEM       1 /* only valid as 
source */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
 #define                 COPY_DATA_PERF          4
 #define                 COPY_DATA_IMM           5
 #define                 COPY_DATA_TIMESTAMP     9
 #define                COPY_DATA_DST_SEL(x)            (((unsigned)(x) & 0xf) 
<< 8)
-#define                 COPY_DATA_MEM_ASYNC     5
+#define                 COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, 
deprecated */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
+#define                 COPY_DATA_PERF          4
+#define                 COPY_DATA_DST_MEM       5
 #define                COPY_DATA_COUNT_SEL             (1 << 16)
 #define                COPY_DATA_WR_CONFIRM            (1 << 20)
+#define                COPY_DATA_ENGINE_PFP            (1 << 30)
 #define PKT3_PFP_SYNC_ME                      0x42
 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use 
ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
 #define         EOP_INT_SEL(x)                          ((x) << 24)
 #define                        EOP_INT_SEL_NONE                        0
 #define                        EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM  3
 #define         EOP_DATA_SEL(x)                         ((x) << 29)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d492456d6b8..339704990e2 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1290,21 +1290,21 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
        if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
                ++reg_count;
        } else {
                ++reg_offset;
                va += 4;
        }
        if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
                ++reg_count;
 
        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
                        COPY_DATA_DST_SEL(COPY_DATA_REG) |
                        (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
        radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
        radeon_emit(cs, 0);
 
        radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
        radeon_emit(cs, 0);
 }
@@ -1420,21 +1420,21 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer 
*cmd_buffer,
        uint64_t va = radv_buffer_get_va(image->bo);
 
        va += image->offset + image->clear_value_offset;
 
        if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
                return;
 
        uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
                        COPY_DATA_DST_SEL(COPY_DATA_REG) |
                        COPY_DATA_COUNT_SEL);
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
        radeon_emit(cs, reg >> 2);
        radeon_emit(cs, 0);
 
        radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 
cmd_buffer->state.predicating));
        radeon_emit(cs, 0);
 }
@@ -3734,21 +3734,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer 
*cmd_buffer,
        if (info->indirect) {
                uint64_t va = radv_buffer_get_va(info->indirect->bo);
 
                va += info->indirect->offset + info->indirect_offset;
 
                radv_cs_add_buffer(ws, cs, info->indirect->bo);
 
                if (loc->sgpr_idx != -1) {
                        for (unsigned i = 0; i < 3; ++i) {
                                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
                                                
COPY_DATA_DST_SEL(COPY_DATA_REG));
                                radeon_emit(cs, (va +  4 * i));
                                radeon_emit(cs, (va + 4 * i) >> 32);
                                radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
                                                 + loc->sgpr_idx * 4) >> 2) + 
i);
                                radeon_emit(cs, 0);
                        }
                }
 
                if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index d607d24cfc6..3af56266cea 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1046,31 +1046,31 @@ void radv_CmdCopyQueryPoolResults(
                                radeon_emit(cs, local_src_va);
                                radeon_emit(cs, local_src_va >> 32);
                                radeon_emit(cs, TIMESTAMP_NOT_READY >> 32);
                                radeon_emit(cs, 0xffffffff);
                                radeon_emit(cs, 4);
                        }
                        if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
                                uint64_t avail_dest_va = dest_va + elem_size;
 
                                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
-                                               
COPY_DATA_DST_SEL(COPY_DATA_MEM));
+                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+                                               
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
                                radeon_emit(cs, local_src_va);
                                radeon_emit(cs, local_src_va >> 32);
                                radeon_emit(cs, avail_dest_va);
                                radeon_emit(cs, avail_dest_va >> 32);
                        }
 
                        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-                       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
-                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+                       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+                                       
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
                                        ((flags & VK_QUERY_RESULT_64_BIT) ? 
COPY_DATA_COUNT_SEL : 0));
                        radeon_emit(cs, local_src_va);
                        radeon_emit(cs, local_src_va >> 32);
                        radeon_emit(cs, dest_va);
                        radeon_emit(cs, dest_va >> 32);
 
 
                        assert(cs->cdw <= cdw_max);
                }
                break;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index e0c6902fec4..cbcd8e79c7b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -727,21 +727,21 @@ static void si_setup_tgsi_user_data(struct si_context 
*sctx,
                        uint64_t base_va = 
r600_resource(info->indirect)->gpu_address;
                        uint64_t va = base_va + info->indirect_offset;
                        int i;
 
                        radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
                                         r600_resource(info->indirect),
                                         RADEON_USAGE_READ, 
RADEON_PRIO_DRAW_INDIRECT);
 
                        for (i = 0; i < 3; ++i) {
                                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+                               radeon_emit(cs, 
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
                                                
COPY_DATA_DST_SEL(COPY_DATA_REG));
                                radeon_emit(cs, (va + 4 * i));
                                radeon_emit(cs, (va + 4 * i) >> 32);
                                radeon_emit(cs, (grid_size_reg >> 2) + i);
                                radeon_emit(cs, 0);
                        }
                }
        } else {
                if (program->uses_grid_size) {
                        radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index c4f6e164fb5..de71572c8aa 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -552,21 +552,21 @@ static void si_pc_emit_select(struct si_context *sctx,
 static void si_pc_emit_start(struct si_context *sctx,
                             struct r600_resource *buffer, uint64_t va)
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
        radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
                                  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-                       COPY_DATA_DST_SEL(COPY_DATA_MEM));
+                       COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
        radeon_emit(cs, 1); /* immediate */
        radeon_emit(cs, 0); /* unused */
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
 
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               
S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | 
EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
@@ -609,34 +609,34 @@ static void si_pc_emit_read(struct si_context *sctx,
        if (!(regs->layout & SI_PC_FAKE)) {
                if (regs->layout & SI_PC_REG_REVERSE)
                        reg_delta = -reg_delta;
 
                for (idx = 0; idx < count; ++idx) {
                        if (regs->counters)
                                reg = regs->counters[idx];
 
                        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
-                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+                                       
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
                                        COPY_DATA_COUNT_SEL); /* 64 bits */
                        radeon_emit(cs, reg >> 2);
                        radeon_emit(cs, 0); /* unused */
                        radeon_emit(cs, va);
                        radeon_emit(cs, va >> 32);
                        va += sizeof(uint64_t);
                        reg += reg_delta;
                }
        } else {
                for (idx = 0; idx < count; ++idx) {
                        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+                                       
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
                                        COPY_DATA_COUNT_SEL);
                        radeon_emit(cs, 0); /* immediate */
                        radeon_emit(cs, 0);
                        radeon_emit(cs, va);
                        radeon_emit(cs, va >> 32);
                        va += sizeof(uint64_t);
                }
        }
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 80e84c23937..bdd7e2c060c 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -792,21 +792,21 @@ static void si_query_hw_do_emit_start(struct si_context 
*sctx,
                for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
                        emit_sample_streamout(cs, va + 32 * stream, stream);
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                /* Write the timestamp from the CP not waiting for
                 * outstanding draws (top-of-pipe).
                 */
                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                radeon_emit(cs, COPY_DATA_COUNT_SEL |
                                COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-                               COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
+                               COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
                radeon_emit(cs, 0);
                radeon_emit(cs, 0);
                radeon_emit(cs, va);
                radeon_emit(cs, va >> 32);
                break;
        case PIPE_QUERY_PIPELINE_STATISTICS:
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
                radeon_emit(cs, va);
                radeon_emit(cs, va >> 32);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index b1d7437edb9..fceb9debc47 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -664,21 +664,21 @@ static void si_emit_draw_packets(struct si_context *sctx,
        if (info->count_from_stream_output) {
                struct si_streamout_target *t =
                        (struct 
si_streamout_target*)info->count_from_stream_output;
                uint64_t va = t->buf_filled_size->gpu_address +
                              t->buf_filled_size_offset;
 
                radeon_set_context_reg(cs, 
R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
                                       t->stride_in_dw);
 
                radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-               radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+               radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
                            COPY_DATA_DST_SEL(COPY_DATA_REG) |
                            COPY_DATA_WR_CONFIRM);
                radeon_emit(cs, va);     /* src address lo */
                radeon_emit(cs, va >> 32); /* src address hi */
                radeon_emit(cs, 
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
                radeon_emit(cs, 0); /* unused */
 
                radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
                                      t->buf_filled_size, RADEON_USAGE_READ,
                                      RADEON_PRIO_SO_FILLED_SIZE);
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to