From: "Jiadong.Zhu" <jiadong....@amd.com>

Trigger MCBP according to the priroty of the
software rings and the hw fence signaling
condition.

The muxer records some lastest locations from the
software ring which is used to resubmit packages
in preemption scenarios.

v2: update comment style

Signed-off-by: Jiadong.Zhu <jiadong....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile          |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c       |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c     | 101 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h     |  29 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h     |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
 9 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 85224bc81ce5..24c5aa19bbf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
        amdgpu_fw_attestation.o amdgpu_securedisplay.o \
        amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-       amdgpu_sw_ring.o amdgpu_ring_mux.o
+       amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                }
        }
 
+       amdgpu_ring_ib_begin(ring);
        if (job && ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
            ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
                ring->funcs->emit_wave_limit(ring, false);
 
+       amdgpu_ring_ib_end(ring);
        amdgpu_ring_commit(ring);
        return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
new file mode 100644
index 000000000000..2a12101a7699
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <drm/gpu_scheduler.h>
+
+#include "amdgpu.h"
+#include "amdgpu_mcbp.h"
+#include "amdgpu_ring.h"
+
+/* trigger mcbp and find if we need resubmit */
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+       struct amdgpu_mux_entry *e;
+       struct amdgpu_ring *ring = NULL;
+       int i;
+
+       DRM_INFO("%s in\n", __func__);
+
+       spin_lock(&mux->lock);
+
+       amdgpu_ring_preempt_ib(mux->real_ring);
+
+       ring = NULL;
+       for (i = 0; i < mux->num_ring_entries; i++) {
+               e = &mux->ring_entries[i];
+               if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+                       ring = e->ring;
+                       break;
+               }
+       }
+
+       if (!ring) {
+               DRM_ERROR("cannot find low priority ring\n");
+               return -ENOENT;
+       }
+
+       amdgpu_fence_process(ring);
+
+       DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
+               ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), 
ring->fence_drv.sync_seq);
+
+       if (atomic_read(&ring->fence_drv.last_seq) !=
+           ring->fence_drv.sync_seq) {
+               DRM_INFO("schedule resubmit\n");
+               mux->s_resubmit = true;
+               amdgpu_ring_mux_schedule_resubmit(mux);
+       }
+
+       spin_unlock(&mux->lock);
+       return 0;
+}
+
+
+/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+       struct amdgpu_ring *ring;
+       uint32_t seq, last_seq;
+       int i, need_preempt;
+
+       need_preempt = 0;
+       for (i = 0; i < mux->num_ring_entries; i++) {
+               ring = mux->ring_entries[i].ring;
+               last_seq = atomic_read(&ring->fence_drv.last_seq);
+               seq = READ_ONCE(ring->fence_drv.sync_seq);
+               DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n",
+                       ring, ring->hw_prio, last_seq, seq);
+               if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
+                       return 0;
+               if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq)
+                       need_preempt = 1;
+       }
+
+       DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit);
+       return need_preempt && !mux->s_resubmit;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
new file mode 100644
index 000000000000..0033bcba8d03
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MCBP_H__
+#define __AMDGPU_MCBP_H__
+
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux);
+int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5b70a2c36d81..6d7f8a40e308 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
 
        return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
 }
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+       if (ring->is_sw_ring)
+               amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+       if (ring->is_sw_ring)
+               amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d3155dc86c07..399037b0d6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -311,6 +311,9 @@ struct amdgpu_ring {
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index ea4a3c66119a..0c9b639b844e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -20,28 +20,60 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-
+#include <linux/slab.h>
 #include <drm/drm_print.h>
 
 #include "amdgpu_ring_mux.h"
+#include "amdgpu_mcbp.h"
 #include "amdgpu_ring.h"
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
 
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
 static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring,
        u64 s_begin, u64 s_end);
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux);
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t);
 
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
 {
        mux->real_ring = ring;
+
        memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
        mux->num_ring_entries = 0;
+
+       mux->s_resubmit = false;
+
+       amdgpu_mux_chunk_slab = kmem_cache_create(
+               "amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0,
+               SLAB_HWCACHE_ALIGN, NULL);
+       if (!amdgpu_mux_chunk_slab) {
+               DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+               return -ENOMEM;
+       }
+
        spin_lock_init(&mux->lock);
+
+       timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
        return 0;
 }
 
 void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
 {
+       struct amdgpu_mux_entry *e;
+       struct amdgpu_mux_chunk *chunk, *chunk2;
+       int i;
+
+       for (i = 0; i < mux->num_ring_entries; i++) {
+               e = &mux->ring_entries[i];
+               list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+                       list_del(&chunk->entry);
+                       kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+               }
+       }
+       kmem_cache_destroy(amdgpu_mux_chunk_slab);
        memset(mux->ring_entries, 0, sizeof(mux->ring_entries));
        mux->num_ring_entries = 0;
 }
@@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, 
struct amdgpu_ring
        e->sw_rptr = 0;
        e->sw_wptr = 0;
 
+       INIT_LIST_HEAD(&e->list);
+
        return 0;
 }
 
@@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux 
*mux, struct amdgpu_ring
 
        return 0;
 }
+
+void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+       mod_timer(&mux->resubmit_timer, jiffies + 
AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring)
+{
+       struct amdgpu_mux_entry *e;
+       struct amdgpu_mux_chunk *chunk;
+
+       if (mux->s_resubmit)
+               amdgpu_mux_resubmit_chunks(mux);
+
+       e = amdgpu_get_sw_entry(mux, ring);
+       if (!e) {
+               DRM_ERROR("cannot find entry!\n");
+               return;
+       }
+
+       chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+       if (!chunk) {
+               DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+               return;
+       }
+
+       chunk->start = ring->wptr;
+       list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring)
+{
+       uint32_t last_seq, size = 0;
+       struct amdgpu_mux_entry *e;
+       struct amdgpu_mux_chunk *chunk, *tmp;
+
+       e = amdgpu_get_sw_entry(mux, ring);
+       if (!e) {
+               DRM_ERROR("cannot find entry!\n");
+               return;
+       }
+
+       last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+       list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+               if (chunk->sync_seq <= last_seq) {
+                       list_del(&chunk->entry);
+                       kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+               } else {
+                       size++;
+               }
+       }
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring)
+{
+       struct amdgpu_mux_entry *e;
+       struct amdgpu_mux_chunk *chunk;
+
+       e = amdgpu_get_sw_entry(mux, ring);
+       if (!e) {
+               DRM_ERROR("cannot find entry!\n");
+               return;
+       }
+
+       chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+       if (!chunk) {
+               DRM_ERROR("cannot find chunk!\n");
+               return;
+       }
+
+       chunk->end = ring->wptr;
+       chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+       scan_and_remove_signaled_chunk(mux, ring);
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+       struct amdgpu_mux_entry *e = NULL;
+       struct amdgpu_mux_chunk *chunk;
+       uint32_t seq, last_seq;
+       int i;
+
+       /*find low priority entries:*/
+       spin_lock(&mux->lock);
+
+       for (i = 0; i < mux->num_ring_entries; i++) {
+               if (mux->ring_entries[i].ring->hw_prio <= 
AMDGPU_RING_PRIO_DEFAULT) {
+                               e = &mux->ring_entries[i];
+                       break;
+               }
+       }
+
+       if (!e) {
+               DRM_ERROR("%s no low priority ring found\n", __func__);
+               return;
+       }
+
+       last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+       seq = READ_ONCE(e->ring->fence_drv.sync_seq);
+       if (seq == last_seq) {
+               DRM_INFO("skip as fence signaled seq=%x\n", seq);
+               return;
+       }
+       DRM_INFO("begin to copy resubmit chunks\n");
+
+       /*resubmit all the fences between (last_seq, seq]*/
+       list_for_each_entry(chunk, &e->list, entry) {
+               if (chunk->sync_seq > last_seq) {
+                       copy_pkt_from_sw_ring(mux, e->ring, chunk->start, 
chunk->end);
+                       amdgpu_ring_commit(mux->real_ring);
+               }
+       }
+       spin_unlock(&mux->lock);
+
+       del_timer(&mux->resubmit_timer);
+       mux->s_resubmit = false;
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+       struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+
+       DRM_INFO("calling %s\n", __func__);
+       amdgpu_mux_resubmit_chunks(mux);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d058c43bb063..1d91c235061a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -44,17 +44,27 @@ struct amdgpu_mux_entry {
        u64 sw_cptr;
        u64 sw_rptr;
        u64 sw_wptr;
+
+       struct list_head list;
 };
 
 struct amdgpu_ring_mux {
        struct amdgpu_ring *real_ring;
 
        struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS];
-
        unsigned num_ring_entries;
 
        spinlock_t                      lock;
 
+       bool s_resubmit;
+       struct timer_list               resubmit_timer;
+};
+
+struct amdgpu_mux_chunk {
+       struct list_head entry;
+       uint32_t sync_seq;
+       u64 start;
+       u64 end;
 };
 
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring);
@@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, 
struct amdgpu_ring
 u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring);
 u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring);
 
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring);
+void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
index 452d0ff37758..143a84c18534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
@@ -26,6 +26,7 @@
 
 #include "amdgpu_sw_ring.h"
 #include "amdgpu_ring_mux.h"
+#include "amdgpu_mcbp.h"
 
 #define amdgpu_ring_get_gpu_addr(ring, offset)                         \
        (ring->is_mes_queue ?                                           \
@@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring)
        ring->adev->rings[ring->idx] = NULL;
 }
 
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+       BUG_ON(!ring->is_sw_ring);
+       if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+               if (amdgpu_mcbp_scan(mux) > 0)
+                       amdgpu_mcbp_trigger_preempt(mux);
+               return;
+       }
+
+       amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+       BUG_ON(!ring->is_sw_ring);
+       if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+               return;
+       amdgpu_ring_mux_end_ib(mux, ring);
+}
-- 
2.25.1

Reply via email to