Looks good to me in general, a few nit picks and sugegstions below.

Am 31.08.2016 um 05:49 schrieb Monk Liu:
v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.

v2:
support GFX7 as well, and bump up version.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.

v3: fix reduntant judgement in cntxcntl.

Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
Signed-off-by: Monk Liu <monk....@amd.com>

Change-Id: I5d24c1bb5c14190ce4adeb6a331ee3d92b3d5c83
Signed-off-by: Monk Liu <monk....@amd.com>

Only one signed of by line is enough and remove the change-ids.

---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  9 +++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 12 ++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 +++++++++-------
  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 20 ++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 30 ++++++++++++++++++++++++++++++
  6 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1254410..0de5f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
        void (*begin_use)(struct amdgpu_ring *ring);
        void (*end_use)(struct amdgpu_ring *ring);
        void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+       void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
  };
/*
@@ -965,6 +966,7 @@ struct amdgpu_ctx {
        spinlock_t              ring_lock;
        struct fence            **fences;
        struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
+       bool preamble_presented;
  };
struct amdgpu_ctx_mgr {
@@ -1227,8 +1229,13 @@ struct amdgpu_cs_parser {
/* user fence */
        struct amdgpu_bo_list_entry     uf_entry;
+       bool preamble_present; /* True means this command submit involves a 
preamble IB */

We only need this in amdgpu_cs_ib_fill() don't we? See below as well.

  };
+#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST      (1 << 1) /* bit set means preamble IB 
is first presented in belonging context */

Why does that makes a difference if it is seen for the first time?

+#define HAVE_CTX_SWITCH                (1 << 2) /* bit set means context 
switch occured */
+
  struct amdgpu_job {
        struct amd_sched_job    base;
        struct amdgpu_device    *adev;
@@ -1237,6 +1244,7 @@ struct amdgpu_job {
        struct amdgpu_sync      sync;
        struct amdgpu_ib        *ibs;
        struct fence            *fence; /* the hw fence */
+       uint32_t                preamble_status;
        uint32_t                num_ibs;
        void                    *owner;
        uint64_t                fence_ctx; /* the fence_context this job uses */
@@ -2264,6 +2272,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
  #define amdgpu_ring_emit_hdp_invalidate(r) 
(r)->funcs->emit_hdp_invalidate((r))
  #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2d4e005..6d8c050 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -792,6 +792,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
                if (r)
                        return r;
+ if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+                       parser->preamble_present = true;
+
                if (parser->job->ring && parser->job->ring != ring)
                        return -EINVAL;
@@ -930,6 +933,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
                return r;
        }
+ if (p->preamble_present) {
+               job->preamble_status |= PREAMBLE_IB_PRESENT;
+               if (!p->ctx->preamble_presented)
+                       job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+       }
+

Better move this to the end of amdgpu_cs_ib_fill() where we allocate the IBs as well.

        job->owner = p->filp;
        job->fence_ctx = entity->fence_context;
        p->fence = fence_get(&job->base.s_fence->finished);
@@ -940,6 +949,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        trace_amdgpu_cs_ioctl(job);
        amd_sched_entity_push_job(&job->base);
+ if (p->preamble_present)
+               p->ctx->preamble_presented = true;
+
        return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 56c85e6..44db0ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -55,9 +55,10 @@
   * - 3.3.0 - Add VM support for UVD on supported hardware.
   * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
   * - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - UMD doesn't/shouldn't need to use CONTEXT_CONTROL in IB, KMD 
should do it
   */
  #define KMS_DRIVER_MAJOR      3
-#define KMS_DRIVER_MINOR       5
+#define KMS_DRIVER_MINOR       6
  #define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 04263f0..b12b5ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
  {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_ib *ib = &ibs[0];
-       bool skip_preamble, need_ctx_switch;
+       bool need_ctx_switch;
        unsigned patch_offset = ~0;
        struct amdgpu_vm *vm;
        uint64_t fence_ctx;
+       uint32_t status = 0;
unsigned i;
        int r = 0;
@@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        /* always set cond_exec_polling to CONTINUE */
        *ring->cond_exe_cpu_addr = 1;
- skip_preamble = ring->current_ctx == fence_ctx;
        need_ctx_switch = ring->current_ctx != fence_ctx;
+       if (job && ring->funcs->emit_cntxcntl) {
+               if (need_ctx_switch)
+                       status |= HAVE_CTX_SWITCH;
+               status |= job->preamble_status;
+               amdgpu_ring_emit_cntxcntl(ring, status);
+       }
+
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
-
-               /* drop preamble IBs if we don't have a context switch */
-               if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
-                       continue;
-

Would be nice to keep this functionality for cases where we don't support emit_cntxcntl (e.g. SI?).

                amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
                                    need_ctx_switch);
                need_ctx_switch = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index f055d49..0d5addb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct 
amdgpu_ring *ring,
        amdgpu_ring_write(ring, control);
  }
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+       uint32_t dw2 = 0;
+
+       dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
NOPs */
+       if (flags & HAVE_CTX_SWITCH) {
+               /* set load_global_config & load_global_uconfig */
+               dw2 |= 0x8001;
+               /* set load_cs_sh_regs */
+               dw2 |= 0x01000000;
+               /* set load_per_context_state & load_gfx_sh_regs */
+               dw2 |= 0x10002;

Better define some constants for those.

Regards,
Christian.

+       }
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+       amdgpu_ring_write(ring, dw2);
+       amdgpu_ring_write(ring, 0);
+}
+
  /**
   * gfx_v7_0_ring_test_ib - basic ring IB test
   *
@@ -4929,6 +4948,7 @@ static const struct amdgpu_ring_funcs 
gfx_v7_0_ring_funcs_gfx = {
        .test_ib = gfx_v7_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
  };
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8ba8e42..73f6ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6085,6 +6085,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
        amdgpu_ring_write(ring, 0);
  }
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+       uint32_t dw2 = 0;
+
+       dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
NOPs */
+       if (flags & HAVE_CTX_SWITCH) {
+               /* set load_global_config & load_global_uconfig */
+               dw2 |= 0x8001;
+               /* set load_cs_sh_regs */
+               dw2 |= 0x01000000;
+               /* set load_per_context_state & load_gfx_sh_regs for GFX */
+               dw2 |= 0x10002;
+
+               /* set load_ce_ram if preamble presented */
+               if (PREAMBLE_IB_PRESENT & flags)
+                       dw2 |= 0x10000000;
+       } else {
+               /* still load_ce_ram if this is the first time preamble 
presented
+                * although there is no context switch happens.
+                */
+               if (PREAMBLE_IB_PRESENT_FIRST & flags)
+                       dw2 |= 0x10000000;
+       }
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+       amdgpu_ring_write(ring, dw2);
+       amdgpu_ring_write(ring, 0);
+}
+
  static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                 enum amdgpu_interrupt_state 
state)
  {
@@ -6267,6 +6296,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .emit_switch_buffer = gfx_v8_ring_emit_sb,
+       .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
  };
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to