On 10/3/18 12:35 AM, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
  src/amd/common/sid.h                          |  5 +++++
  src/gallium/drivers/radeonsi/si_fence.c       | 19 ++++++++++---------
  src/gallium/drivers/radeonsi/si_perfcounter.c |  2 ++
  src/gallium/drivers/radeonsi/si_pipe.h        |  2 +-
  src/gallium/drivers/radeonsi/si_query.c       |  9 +++++++--
  src/gallium/drivers/radeonsi/si_state_draw.c  |  8 ++++++--
  6 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index b3321ea3a77..3e36eb2d046 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -174,28 +174,33 @@
  #define                 COPY_DATA_DST_MEM       5
  #define               COPY_DATA_COUNT_SEL             (1 << 16)
  #define               COPY_DATA_WR_CONFIRM            (1 << 20)
  #define               COPY_DATA_ENGINE_PFP            (1 << 30)
  #define PKT3_PFP_SYNC_ME                     0x42
  #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use 
ACQUIRE_MEM */
  #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
  #define PKT3_COND_WRITE                        0x45
  #define PKT3_EVENT_WRITE                       0x46
  #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
+#define         EOP_DST_SEL(x)                         ((x) << 16)
+#define                        EOP_DST_SEL_MEM                 0
+#define                        EOP_DST_SEL_TC_L2               1
  #define         EOP_INT_SEL(x)                          ((x) << 24)
  #define                       EOP_INT_SEL_NONE                        0
  #define                       EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM  3
  #define         EOP_DATA_SEL(x)                         ((x) << 29)
  #define                       EOP_DATA_SEL_DISCARD            0
  #define                       EOP_DATA_SEL_VALUE_32BIT        1
  #define                       EOP_DATA_SEL_VALUE_64BIT        2
  #define                       EOP_DATA_SEL_TIMESTAMP          3
+#define                        EOP_DATA_SEL_GDS                5
+#define                EOP_DATA_GDS(dw_offset, num_dwords)     ((dw_offset) | 
((unsigned)(num_dwords) << 16))
  /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
   * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
   * DST_SEL=MC. Only CIK chips are affected.
   */
  /* fix CP DMA before uncommenting: */
  /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
  #define PKT3_RELEASE_MEM                       0x49 /* GFX9+ [any ring] or 
GFX8 [compute ring only] */
  #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
  #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
  #define PKT3_SET_CONFIG_REG                    0x68
diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index abb7057f299..005fd9c1576 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -59,34 +59,32 @@ struct si_multi_fence {
   * \param event               EVENT_TYPE_*
   * \param event_flags Optional cache flush flags (TC)
   * \param data_sel    1 = fence, 3 = timestamp
   * \param buf         Buffer
   * \param va          GPU address
   * \param old_value   Previous fence value (for a bug workaround)
   * \param new_value   Fence value to write for this event.
   */

You might want to update the documentation too.

  void si_gfx_write_event_eop(struct si_context *ctx,
                            unsigned event, unsigned event_flags,
-                           unsigned data_sel,
+                           unsigned dst_sel, unsigned int_sel, unsigned 
data_sel,
                            struct r600_resource *buf, uint64_t va,
                            uint32_t new_fence, unsigned query_type)
  {
        struct radeon_cmdbuf *cs = ctx->gfx_cs;
        unsigned op = EVENT_TYPE(event) |
-                     EVENT_INDEX(5) |
+                     EVENT_INDEX(event == V_028A90_CS_DONE ||
+                                 event == V_028A90_PS_DONE ? 6 : 5) |
                      event_flags;
-       unsigned sel = EOP_DATA_SEL(data_sel);
-
-       /* Wait for write confirmation before writing data, but don't send
-        * an interrupt. */
-       if (data_sel != EOP_DATA_SEL_DISCARD)
-               sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
+       unsigned sel = EOP_DST_SEL(dst_sel) |
+                      EOP_INT_SEL(int_sel) |
+                      EOP_DATA_SEL(data_sel);
if (ctx->chip_class >= GFX9) {
                /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
                 * counters) must immediately precede every timestamp event to
                 * prevent a GPU hang on GFX9.
                 *
                 * Occlusion queries don't need to do it here, because they
                 * always do ZPASS_DONE before the timestamp.
                 */
                if (ctx->chip_class == GFX9 &&
@@ -268,21 +266,24 @@ static void si_fine_fence_set(struct si_context *ctx,
        if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
                struct radeon_cmdbuf *cs = ctx->gfx_cs;
                radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
                radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
                        S_370_WR_CONFIRM(1) |
                        S_370_ENGINE_SEL(V_370_PFP));
                radeon_emit(cs, fence_va);
                radeon_emit(cs, fence_va >> 32);
                radeon_emit(cs, 0x80000000);
        } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
-               si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+               si_gfx_write_event_eop(ctx,
+                                      V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       NULL, fence_va, 0x80000000,
                                       PIPE_QUERY_GPU_FINISHED);
        } else {
                assert(false);
        }
  }
static boolean si_fence_finish(struct pipe_screen *screen,
                               struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index de71572c8aa..f3ef3d28c8a 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -574,20 +574,22 @@ static void si_pc_emit_start(struct si_context *sctx,
  }
/* Note: The buffer was already added in si_pc_emit_start, so we don't have to
   * do it again in here. */
  static void si_pc_emit_stop(struct si_context *sctx,
                            struct r600_resource *buffer, uint64_t va)
  {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                              EOP_DST_SEL_MEM,
+                              EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                               EOP_DATA_SEL_VALUE_32BIT,
                               buffer, va, 0, SI_NOT_QUERY);
        si_gfx_wait_fence(sctx, va, 0, 0xffffffff);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | 
EVENT_INDEX(0));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 29d7e555a0c..73c54df3a03 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1165,21 +1165,21 @@ void si_sdma_clear_buffer(struct si_context *sctx, 
struct pipe_resource *dst,
  void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
                       struct r600_resource *dst, struct r600_resource *src);
  void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
                     struct pipe_fence_handle **fence);
  void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource 
*dst,
                            uint64_t offset, uint64_t size, unsigned value);
/* si_fence.c */
  void si_gfx_write_event_eop(struct si_context *ctx,
                            unsigned event, unsigned event_flags,
-                           unsigned data_sel,
+                           unsigned dst_sel, unsigned int_sel, unsigned 
data_sel,
                            struct r600_resource *buf, uint64_t va,
                            uint32_t new_fence, unsigned query_type);
  unsigned si_gfx_write_fence_dwords(struct si_screen *screen);
  void si_gfx_wait_fence(struct si_context *ctx,
                       uint64_t va, uint32_t ref, uint32_t mask);
  void si_init_fence_functions(struct si_context *ctx);
  void si_init_screen_fence_functions(struct si_screen *screen);
  struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
                                          struct tc_unflushed_batch_token 
*tc_token);
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index bdd7e2c060c..45c8e146ecf 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -884,21 +884,23 @@ static void si_query_hw_do_emit_stop(struct si_context 
*sctx,
        case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
                va += 16;
                for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
                        emit_sample_streamout(cs, va + 32 * stream, stream);
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                va += 8;
                /* fall through */
        case PIPE_QUERY_TIMESTAMP:
                si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
-                                      0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
+                                      0, EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+                                      EOP_DATA_SEL_TIMESTAMP, NULL, va,
                                       0, query->b.type);
                fence_va = va + 8;
                break;
        case PIPE_QUERY_PIPELINE_STATISTICS: {
                unsigned sample_size = (query->result_size - 8) / 2;
va += sample_size;
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
                radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
                radeon_emit(cs, va);
@@ -906,25 +908,28 @@ static void si_query_hw_do_emit_stop(struct si_context 
*sctx,
fence_va = va + sample_size;
                break;
        }
        default:
                assert(0);
        }
        radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, 
RADEON_USAGE_WRITE,
                                  RADEON_PRIO_QUERY);
- if (fence_va)
+       if (fence_va) {
                si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       query->buffer.buf, fence_va, 0x80000000,
                                       query->b.type);
+       }
  }
static void si_query_hw_emit_stop(struct si_context *sctx,
                                  struct si_query_hw *query)
  {
        uint64_t va;
if (!query->buffer.buf)
                return; // previous buffer allocation failure
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index fceb9debc47..3d56d8e9ab4 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -911,22 +911,24 @@ void si_emit_cache_flush(struct si_context *sctx)
                                         S_0085F0_CB1_DEST_BASE_ENA(1) |
                                         S_0085F0_CB2_DEST_BASE_ENA(1) |
                                         S_0085F0_CB3_DEST_BASE_ENA(1) |
                                         S_0085F0_CB4_DEST_BASE_ENA(1) |
                                         S_0085F0_CB5_DEST_BASE_ENA(1) |
                                         S_0085F0_CB6_DEST_BASE_ENA(1) |
                                         S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
                        if (sctx->chip_class == VI)
-                               si_gfx_write_event_eop(sctx, 
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
-                                                      0, EOP_DATA_SEL_DISCARD, 
NULL,
+                               si_gfx_write_event_eop(sctx,
+                                                      
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+                                                      0, EOP_DST_SEL_MEM, 
EOP_INT_SEL_NONE,
+                                                      EOP_DATA_SEL_DISCARD, 
NULL,
                                                       0, 0, SI_NOT_QUERY);
                }
                if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
                        cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
                                         S_0085F0_DB_DEST_BASE_ENA(1);
        }
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
                /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@@ -1027,20 +1029,22 @@ void si_emit_cache_flush(struct si_context *sctx)
                                   SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
                                   SI_CONTEXT_INV_VMEM_L1);
                        sctx->num_L2_invalidates++;
                }
/* Do the flush (enqueue the event and wait for it). */
                va = sctx->wait_mem_scratch->gpu_address;
                sctx->wait_mem_number++;
si_gfx_write_event_eop(sctx, cb_db_event, tc_flags,
+                                      EOP_DST_SEL_MEM,
+                                      EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
                                       EOP_DATA_SEL_VALUE_32BIT,
                                       sctx->wait_mem_scratch, va,
                                       sctx->wait_mem_number, SI_NOT_QUERY);
                si_gfx_wait_fence(sctx, va, sctx->wait_mem_number, 0xffffffff);
        }
/* Make sure ME is idle (it executes most packets) before continuing.
         * This prevents read-after-write hazards between PFP and ME.
         */
        if (cp_coher_cntl ||

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to