Since commit "Move to a per-IB secure flag (TMZ)",
we've been seeing hangs in GFX. Ray H. pointed out
by sending a patch that we need to send FRAME
CONTROL stop/start back-to-back, every time we
flip the TMZ flag as per each IB we submit. That
is, when we transition from TMZ to non-TMZ we have
to send a stop with TMZ followed by a start with
non-TMZ, and similarly for transitioning from
non-TMZ into TMZ.

This patch implements this, thus fixing the GFX
hang.

Signed-off-by: Luben Tuikov <luben.tui...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
 4 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4b2342d11520..16d6df3304d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                amdgpu_ring_emit_cntxcntl(ring, status);
        }
 
-       secure = false;
+       /* Find the first non-preamble IB.
+        */
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
 
                /* drop preamble IBs if we don't have a context switch */
-               if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
-                   skip_preamble &&
-                   !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
-                   !amdgpu_mcbp &&
-                   !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble 
CE ib must be inserted anyway */
-                       continue;
-
-               /* If this IB is TMZ, add frame TMZ start packet,
-                * else, turn off TMZ.
-                */
-               if (ib->flags & AMDGPU_IB_FLAGS_SECURE && 
ring->funcs->emit_tmz) {
-                       if (!secure) {
-                               secure = true;
-                               amdgpu_ring_emit_tmz(ring, true);
-                       }
-               } else if (secure) {
+               if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
+                   !skip_preamble ||
+                   (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
+                   amdgpu_mcbp ||
+                   amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE 
ib must be inserted anyway */
+                       break;
+       }
+       if (i >= num_ibs)
+               goto Done;
+       /* Setup initial TMZiness and send it off.
+        */
+       secure = false;
+       if (job && ring->funcs->emit_frame_cntl) {
+               if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
+                       secure = true;
+               else
                        secure = false;
-                       amdgpu_ring_emit_tmz(ring, false);
-               }
-
-               amdgpu_ring_emit_ib(ring, job, ib, status);
-               status &= ~AMDGPU_HAVE_CTX_SWITCH;
+               amdgpu_ring_emit_frame_cntl(ring, true, secure);
        }
+       amdgpu_ring_emit_ib(ring, job, ib, status);
+       status &= ~AMDGPU_HAVE_CTX_SWITCH;
+       i += 1;
+       /* Send the rest of the IBs.
+        */
+       if (job && ring->funcs->emit_frame_cntl) {
+               for ( ; i < num_ibs; ++i) {
+                       ib = &ibs[i];
+
+                       /* drop preamble IBs if we don't have a context switch 
*/
+                       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+                           skip_preamble &&
+                           !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+                           !amdgpu_mcbp &&
+                           !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
Preamble CE ib must be inserted anyway */
+                               continue;
+
+                       if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+                               amdgpu_ring_emit_frame_cntl(ring, false, 
secure);
+                               secure = !secure;
+                               amdgpu_ring_emit_frame_cntl(ring, true, secure);
+                       }
 
-       if (secure) {
-               secure = false;
-               amdgpu_ring_emit_tmz(ring, false);
+                       amdgpu_ring_emit_ib(ring, job, ib, status);
+                       status &= ~AMDGPU_HAVE_CTX_SWITCH;
+               }
+               amdgpu_ring_emit_frame_cntl(ring, false, secure);
+       } else {
+               for ( ; i < num_ibs; ++i) {
+                       ib = &ibs[i];
+
+                       /* drop preamble IBs if we don't have a context switch 
*/
+                       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+                           skip_preamble &&
+                           !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+                           !amdgpu_mcbp &&
+                           !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, 
Preamble CE ib must be inserted anyway */
+                               continue;
+
+                       amdgpu_ring_emit_ib(ring, job, ib, status);
+                       status &= ~AMDGPU_HAVE_CTX_SWITCH;
+               }
        }
-
+Done:
 #ifdef CONFIG_X86_64
        if (!(adev->flags & AMD_IS_APU))
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff085d00..4d019d6b3eb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
        void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
                                        uint32_t reg0, uint32_t reg1,
                                        uint32_t ref, uint32_t mask);
-       void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+       void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+                               bool secure);
        /* priority functions */
        void (*set_priority) (struct amdgpu_ring *ring,
                              enum drm_sched_priority priority);
@@ -247,7 +248,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), 
(d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) 
(r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), 
(b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 44f00ecea322..3e83ddb64c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct 
amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device 
*adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool 
start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t 
queue_mask)
 {
@@ -4724,12 +4724,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct 
amdgpu_ring *ring, bool resume)
                                           sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool 
start,
+                                   bool secure)
 {
-       if (amdgpu_is_tmz(ring->adev)) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-       }
+       uint32_t v = secure ? FRAME_TMZ : 0;
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
@@ -5183,7 +5184,7 @@ static const struct amdgpu_ring_funcs 
gfx_v10_0_ring_funcs_gfx = {
        .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
        .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
        .preempt_ib = gfx_v10_0_ring_preempt_ib,
-       .emit_tmz = gfx_v10_0_ring_emit_tmz,
+       .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v10_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c7a16b91686..fbde71224127 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct 
amdgpu_ring *ring)
        amdgpu_ring_write_multiple(ring, (void *)&de_payload, 
sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+                                  bool secure)
 {
-       if (amdgpu_is_tmz(ring->adev)) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-       }
+       uint32_t v = secure ? FRAME_TMZ : 0;
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs 
gfx_v9_0_ring_funcs_gfx = {
        .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
        .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-       .emit_tmz = gfx_v9_0_ring_emit_tmz,
+       .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
-- 
2.25.1.362.g51ebf55b93

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to