Ensuring accurate IB package searching and covers
more corners for AV1 encoding requests.

Signed-off-by: David (Ming Qiang) Wu <david....@amd.com>
Reviewed-by: Ruijing Dong <ruijing.d...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 81 +++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 22a41766a8c7..8235ff3820ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1726,6 +1726,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, 
struct amdgpu_job *job,
 
 #define RADEON_VCN_ENGINE_TYPE_ENCODE                  (0x00000002)
 #define RADEON_VCN_ENGINE_TYPE_DECODE                  (0x00000003)
+#define RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE            (0x00000004)
 
 #define RADEON_VCN_ENGINE_INFO                         (0x30000001)
 #define RADEON_VCN_ENGINE_INFO_MAX_OFFSET              16
@@ -1733,21 +1734,86 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, 
struct amdgpu_job *job,
 #define RENCODE_ENCODE_STANDARD_AV1                    2
 #define RENCODE_IB_PARAM_SESSION_INIT                  0x00000003
 #define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET       64
+#define RENCODE_IB_ENC_QUE_INSTRUCTION                 (0x32000001)
+#define RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET      64
 
 /* return the offset in ib if id is found, -1 otherwise
  * to speed up the searching we only search upto max_offset
  */
-static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int 
max_offset)
+static int vcn_v4_0_enc_find_ib_param(uint32_t *ptr, int size, uint32_t id, 
int max_offset)
 {
        int i;
 
-       for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i 
+= ib->ptr[i]/4) {
-               if (ib->ptr[i + 1] == id)
+       for (i = 0; i < size && i < max_offset && ptr[i] >= 8; i += ptr[i] / 4) 
{
+               if (ptr[i + 1] == id)
                        return i;
        }
        return -1;
 }
 
+static int vcn_v4_0_enc_queue_msg(struct amdgpu_cs_parser *p,
+                                 struct amdgpu_job *job,
+                                 struct amdgpu_ib *ib)
+{
+       struct ttm_operation_ctx ctx = { false, false };
+       struct amdgpu_bo_va_mapping *map;
+       struct amdgpu_bo *bo;
+       uint64_t start, end;
+       int i;
+       void *ptr;
+       int r;
+       int data_size = 0;
+       uint64_t addr;
+       uint32_t *msg;
+
+       i = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, 
RENCODE_IB_ENC_QUE_INSTRUCTION,
+               RENCODE_IB_ENC_QUE_INSTRUCTION_MAX_OFFSET);
+       if (i >= 0) {
+               addr = ((uint64_t)ib->ptr[i + 3]) << 32 | ib->ptr[i + 2];
+               data_size = ib->ptr[i + 4];
+       }
+
+       if (!data_size) /* did not find */
+               return 0;
+
+       addr &= AMDGPU_GMC_HOLE_MASK;
+       r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+       if (r) {
+               DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+               return r;
+       }
+
+       start = map->start * AMDGPU_GPU_PAGE_SIZE;
+       end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+       if (addr & 0x7) {
+               DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+               return -EINVAL;
+       }
+
+       bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+       amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (r) {
+               DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+               return r;
+       }
+
+       r = amdgpu_bo_kmap(bo, &ptr);
+       if (r) {
+               DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+               return r;
+       }
+
+       msg = ptr + addr - start; /* IB with SESSION_INIT */
+       i = vcn_v4_0_enc_find_ib_param(msg, data_size, 
RENCODE_IB_PARAM_SESSION_INIT,
+               RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+       if (i >= 0 && msg[i + 2] == RENCODE_ENCODE_STANDARD_AV1)
+               r = vcn_v4_0_limit_sched(p, job);
+
+       amdgpu_bo_kunmap(bo);
+       return r;
+}
+
 static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
                                           struct amdgpu_job *job,
                                           struct amdgpu_ib *ib)
@@ -1763,12 +1829,13 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct 
amdgpu_cs_parser *p,
                return 0;
 
        /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
-       idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
+       idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw, 
RADEON_VCN_ENGINE_INFO,
                        RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
        if (idx < 0) /* engine info is missing */
                return 0;
 
        val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+
        if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
                decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx 
+ 6];
 
@@ -1779,10 +1846,12 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct 
amdgpu_cs_parser *p,
                        decode_buffer->msg_buffer_address_lo;
                return vcn_v4_0_dec_msg(p, job, addr);
        } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
-               idx = vcn_v4_0_enc_find_ib_param(ib, 
RENCODE_IB_PARAM_SESSION_INIT,
-                       RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+               idx = vcn_v4_0_enc_find_ib_param(ib->ptr, ib->length_dw,
+                       RENCODE_IB_PARAM_SESSION_INIT, 
RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
                if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
                        return vcn_v4_0_limit_sched(p, job);
+       } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE_QUEUE) {
+               return vcn_v4_0_enc_queue_msg(p, job, ib);
        }
        return 0;
 }
-- 
2.34.1

Reply via email to