Virtualization world switch need each command that is submitted
into GFX with an extra entry, which will using WRITE_DATA to
fullfill CSA. In this way, CP will save CE/DE snapshots when
preemption occurred and restore it later.

Signed-off-by: Monk Liu <monk....@amd.com>
Signed-off-by: Xiangliang Yu <xiangliang...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 +
 drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    | 39 ++++++++++++++
 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 88 ++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index acf48de..cc35255 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        if (ring->funcs->emit_hdp_flush)
                amdgpu_ring_emit_hdp_flush(ring);
 
+       amdgpu_gfx_ring_emit_meta_data(ring, vm);
+
        /* always set cond_exec_polling to CONTINUE */
        *ring->cond_exe_cpu_addr = 1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index dff1248..d6f57a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
 /* Context Save Area functions */
 int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
+                                   struct amdgpu_vm *vm);
 #endif
diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h 
b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
index a25e07f..cc3123b 100644
--- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
+++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
@@ -26,9 +26,48 @@
 
 #include "amdgpu.h"
 
+/* context save area structures */
+struct amdgpu_ce_ib_state {
+       uint32_t        ce_ib_completion_status;
+       uint32_t        ce_const_engine_count;
+       uint32_t        ce_ib_offset_ib1;
+       uint32_t        ce_ib_offset_ib2;
+};
+
+struct amdgpu_de_ib_state {
+       uint32_t        de_ib_completion_status;
+       uint32_t        de_const_engine_count;
+       uint32_t        de_ib_offset_ib1;
+       uint32_t        de_ib_offset_ib2;
+       uint32_t        preamble_begin_ib1;
+       uint32_t        preamble_begin_ib2;
+       uint32_t        preamble_end_ib1;
+       uint32_t        preamble_end_ib2;
+       uint32_t        draw_indirect_base_lo;
+       uint32_t        draw_indirect_base_hi;
+       uint32_t        disp_indirect_base_lo;
+       uint32_t        disp_indirect_base_hi;
+       uint32_t        gds_backup_addr_lo;
+       uint32_t        gds_backup_addr_hi;
+       uint32_t        index_base_addr_lo;
+       uint32_t        index_base_addr_hi;
+       uint32_t        sample_cntl;
+};
+
+struct amdgpu_gfx_meta_data {
+       struct amdgpu_ce_ib_state       ce_payload;
+       uint32_t                        reserved1[60];
+       struct amdgpu_de_ib_state       de_payload;
+       uint32_t                        de_ib_base_addr_lo;
+       uint32_t                        de_ib_base_addr_hi;
+       uint32_t                        reserved2[941];
+};
+
 /* xgpu structures */
 struct amd_xgpu_csa {
        struct amdgpu_bo            *robj;
+       struct amdgpu_ce_ib_state   ce_payload;
+       struct amdgpu_de_ib_state   de_payload;
        uint64_t                    gpu_addr;
        uint64_t                    gds_addr;
        int32_t                     size;
diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c 
b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
index 246a747..6d4246c 100644
--- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
+++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
@@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)
        sa = &xgpu->sa;
        xgpu_vm_unmap_csa(adev, vm, sa);
 }
+
+static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
+                                      void *src, int count_dw)
+{
+       if (ring->count_dw < count_dw)
+               DRM_ERROR("writing more dwords to the ring than expected:%d.\n",
+                          count_dw);
+       else {
+               unsigned int chunk1, chunk2;
+               void *dst = (void *)&ring->ring[ring->wptr];
+
+               chunk1 = ring->ptr_mask + 1 - ring->wptr;
+               chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
+               chunk2 = count_dw - chunk1;
+               chunk1 <<= 2;
+               chunk2 <<= 2;
+               if (chunk1) {
+                       memcpy(dst, src, chunk1);
+                       dst = (void *)(((uint64_t)dst + chunk1) &
+                                       ring->ptr_mask);
+               }
+
+               if (chunk2) {
+                       src += chunk1;
+                       dst = (void *)ring->ring;
+                       memcpy(dst, src, chunk2);
+               }
+
+               ring->wptr += count_dw;
+               ring->wptr &= ring->ptr_mask;
+               ring->count_dw -= count_dw;
+       }
+}
+
+void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
+                                   struct amdgpu_vm *vm)
+{
+       struct amdgpu_ce_ib_state *ce_payload;
+       struct amdgpu_de_ib_state *de_payload;
+       struct amd_xgpu_csa *sa = NULL;
+       struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data;
+       uint64_t csa_addr, gds_addr;
+       int cnt;
+
+       if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX))
+               return;
+
+       sa = &xgpu->sa;
+
+       ce_payload = &sa->ce_payload;
+       de_payload = &sa->de_payload;
+       memset(ce_payload, 0, sizeof(*ce_payload));
+       memset(de_payload, 0, sizeof(*de_payload));
+
+       cnt = (sizeof(*ce_payload) >> 2) + 4 - 2;
+       csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr;
+       gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr;
+       de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr);
+       de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr);
+
+       /* write CE meta data */
+       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) |
+                               WRITE_DATA_DST_SEL(8) |
+                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
+       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+                         offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
+       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+                         offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
+
+       amdgpu_ring_write_multiple(ring, (void *)ce_payload,
+                                        sizeof(*ce_payload) >> 2);
+
+       /* write DE meta data */
+       cnt = (sizeof(*de_payload) >> 2) + 4 - 2;
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) |
+                               WRITE_DATA_DST_SEL(8) |
+                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
+       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+                         offsetof(struct amdgpu_gfx_meta_data, de_payload)));
+       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+                         offsetof(struct amdgpu_gfx_meta_data, de_payload)));
+
+       amdgpu_ring_write_multiple(ring, (void *)de_payload,
+                                        sizeof(*de_payload) >> 2);
+}
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to