amdgpu/amdgpu_device.c | 24 -- amdgpu/amdgpu_gpu_info.c | 72 +++---- amdgpu/amdgpu_internal.h | 4 amdgpu/amdgpu_vamgr.c | 6 autogen.sh | 10 configure.ac | 3 freedreno/Makefile.am | 1 freedreno/freedreno_bo.c | 12 + freedreno/freedreno_bo_cache.c | 4 freedreno/freedreno_device.c | 3 freedreno/freedreno_priv.h | 56 +++++ freedreno/kgsl/kgsl_device.c | 2 freedreno/msm/msm_device.c | 2 freedreno/msm/msm_ringbuffer.c | 7 include/drm/README | 2 include/drm/amdgpu_drm.h | 409 +++++++++++++++++++++++++--------------- intel/intel_bufmgr_gem.c | 58 +---- intel/intel_decode.c | 3 tests/amdgpu/basic_tests.c | 27 ++ tests/amdgpu/cs_tests.c | 41 ++-- tests/amdgpu/vce_tests.c | 54 ++++- tests/drmstat.c | 419 ----------------------------------------- 22 files changed, 518 insertions(+), 701 deletions(-)
New commits: commit 8d61a9a923c1ced974180609611ef615034fd484 Author: Marek Olšák <marek.ol...@amd.com> Date: Wed Mar 29 20:06:22 2017 +0200 configure.ac: bump version for release diff --git a/configure.ac b/configure.ac index 2e50d3e..6a60ffc 100644 --- a/configure.ac +++ b/configure.ac @@ -20,7 +20,7 @@ AC_PREREQ([2.63]) AC_INIT([libdrm], - [2.4.75], + [2.4.76], [https://bugs.freedesktop.org/enter_bug.cgi?product=DRI], [libdrm]) commit c7b5aaeb1f77a53a46c091b1ba999a76baa6c3fb Author: Leo Liu <leo....@amd.com> Date: Thu Mar 23 10:43:40 2017 -0400 amdgpu_drm: add AMDGPU_HW_IP_UVD_ENC Signed-off-by: Leo Liu <leo....@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> Signed-off-by: Marek Olšák <marek.ol...@amd.com> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 1e25a87..fa56499 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -388,7 +388,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_DMA 2 #define AMDGPU_HW_IP_UVD 3 #define AMDGPU_HW_IP_VCE 4 -#define AMDGPU_HW_IP_NUM 5 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_NUM 6 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 commit f684bb109fcdb85faa1b212bb6efcc352d8cbcdc Author: Christian König <christian.koe...@amd.com> Date: Mon Mar 27 15:44:14 2017 +0200 amdgpu: stop reading CC_RB_BACKEND_DISABLE on Vega10 Follow up to 'drm: don't access deprecated register on Vega10'. The same information is available in enabled_rb_pipes_mask and reading that register can cause GRBM bus problems. Signed-off-by: Christian König <christian.koe...@amd.com> Signed-off-by: Marek Olšák <marek.ol...@amd.com> diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c index cd31e1b..c5f5f6f 100644 --- a/amdgpu/amdgpu_gpu_info.c +++ b/amdgpu/amdgpu_gpu_info.c @@ -169,20 +169,20 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev) dev->info.vce_harvest_config = dev->dev_info.vce_harvest_config; dev->info.pci_rev_id = dev->dev_info.pci_rev; - for (i = 0; i < (int)dev->info.num_shader_engines; i++) { - unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | - (AMDGPU_INFO_MMR_SH_INDEX_MASK << - AMDGPU_INFO_MMR_SH_INDEX_SHIFT); + if (dev->info.family_id < AMDGPU_FAMILY_AI) { + for (i = 0; i < (int)dev->info.num_shader_engines; i++) { + unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | + (AMDGPU_INFO_MMR_SH_INDEX_MASK << + AMDGPU_INFO_MMR_SH_INDEX_SHIFT); - r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, - &dev->info.backend_disable[i]); - if (r) - return r; - /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ - dev->info.backend_disable[i] = - (dev->info.backend_disable[i] >> 16) & 0xff; + r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, + &dev->info.backend_disable[i]); + if (r) + return r; + /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ + dev->info.backend_disable[i] = + (dev->info.backend_disable[i] >> 16) & 0xff; - if (dev->info.family_id < AMDGPU_FAMILY_AI) { r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, &dev->info.pa_sc_raster_cfg[i]); if (r) commit a784c38af77714b9e878a7ff97ba18553697304c Author: Junwei Zhang <jerry.zh...@amd.com> Date: Tue Dec 20 09:52:32 2016 +0800 tests/amdgpu: add Polaris12 support for cs test Signed-off-by: Junwei Zhang <jerry.zh...@amd.com> Reviewed-by: Ken Wang <qingqing.w...@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c index 0885d97..342815d 100644 --- a/tests/amdgpu/cs_tests.c +++ b/tests/amdgpu/cs_tests.c @@ -216,7 +216,8 @@ static void amdgpu_cs_uvd_create(void) ((uint8_t*)msg)[0x10] = 7; /* chip beyond polaris 10/11 */ if ((family_id == AMDGPU_FAMILY_AI) || - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { /* dpb size */ ((uint8_t*)msg)[0x28] = 0x00; ((uint8_t*)msg)[0x29] = 0x94; @@ -296,7 +297,8 @@ static void amdgpu_cs_uvd_decode(void) ptr[0x99] = 0x02; /* chip beyond polaris10/11 */ if ((family_id == AMDGPU_FAMILY_AI) || - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { /* dpb size */ ptr[0x24] = 0x00; ptr[0x25] = 0x94; @@ -341,7 +343,8 @@ static void amdgpu_cs_uvd_decode(void) if (family_id >= AMDGPU_FAMILY_VI) { if ((family_id == AMDGPU_FAMILY_AI) || - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) { ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); } } @@ -358,7 +361,8 @@ static void amdgpu_cs_uvd_decode(void) if (family_id >= AMDGPU_FAMILY_VI) { uvd_cmd(it_addr, 0x204, &i); if ((family_id == AMDGPU_FAMILY_AI) || - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A || + chip_id == chip_rev+0x64)) uvd_cmd(ctx_addr, 0x206, &i); } commit f810e31bcf686a156b7b5be6298cd52247a98189 Author: Leo Liu <leo....@amd.com> Date: Tue Dec 13 15:24:05 2016 -0500 tests/amdgpu: add vce unit test support for vega10 swizzle mode needs reference and input picture luma and chroma pitch aligned with 256 Signed-off-by: Leo Liu <leo....@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c index de63aa1..b03807b 100644 --- a/tests/amdgpu/vce_tests.c +++ b/tests/amdgpu/vce_tests.c @@ -234,6 +234,7 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo) static void amdgpu_cs_vce_create(void) { + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; int len, r; enc.width = vce_create[6]; @@ -250,6 +251,8 @@ static void amdgpu_cs_vce_create(void) memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); len += sizeof(vce_taskinfo) / 4; memcpy((ib_cpu + len), vce_create, sizeof(vce_create)); + ib_cpu[len + 8] = ALIGN(enc.width, align); + ib_cpu[len + 9] = ALIGN(enc.width, align); len += sizeof(vce_create) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc.fb[0].addr >> 32; @@ -291,10 +294,12 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len = 0, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); + int len = 0, i, r; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); len += sizeof(vce_session) / 4; @@ -309,6 +314,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[0].addr >> 32; @@ -319,8 +328,10 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; - ib_cpu[len + 73] = 0x7800; - ib_cpu[len + 74] = 0x7800 + 0x5000; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); + ib_cpu[len + 73] = luma_size * 1.5; + ib_cpu[len + 74] = luma_size * 2.5; len += sizeof(vce_encode) / 4; enc->ib_len = len; if (!enc->two_instance) { @@ -332,11 +343,13 @@ static void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) { uint64_t luma_offset, chroma_offset; - int len, r; + int len, i, r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); len = (enc->two_instance) ? enc->ib_len : 0; luma_offset = enc->vbuf.addr; - chroma_offset = luma_offset + enc->width * enc->height; + chroma_offset = luma_offset + luma_size; if (!enc->two_instance) { memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); @@ -353,6 +366,10 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) ib_cpu[len + 3] = enc->cpb.addr; len += sizeof(vce_context_buffer) / 4; memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); + for (i = 0; i < 8; ++i) + ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); + for (i = 0; i < 8; ++i) + ib_cpu[len + 10 + i] = luma_size * 1.5; len += sizeof(vce_aux_buffer) / 4; memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); ib_cpu[len + 2] = enc->fb[1].addr >> 32; @@ -364,15 +381,17 @@ static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) ib_cpu[len + 10] = luma_offset; ib_cpu[len + 11] = chroma_offset >> 32; ib_cpu[len + 12] = chroma_offset; + ib_cpu[len + 14] = ALIGN(enc->width, align); + ib_cpu[len + 15] = ALIGN(enc->width, align); ib_cpu[len + 18] = 0; ib_cpu[len + 19] = 0; ib_cpu[len + 56] = 3; ib_cpu[len + 57] = 0; ib_cpu[len + 58] = 0; - ib_cpu[len + 59] = 0x7800; - ib_cpu[len + 60] = 0x7800 + 0x5000; + ib_cpu[len + 59] = luma_size * 1.5; + ib_cpu[len + 60] = luma_size * 2.5; ib_cpu[len + 73] = 0; - ib_cpu[len + 74] = 0x5000; + ib_cpu[len + 74] = luma_size; ib_cpu[len + 81] = 1; ib_cpu[len + 82] = 1; len += sizeof(vce_encode) / 4; @@ -408,9 +427,10 @@ static void check_result(struct amdgpu_vce_encode *enc) static void amdgpu_cs_vce_encode(void) { uint32_t vbuf_size, bs_size = 0x154000, cpb_size; - int r; + unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; + int i, r; - vbuf_size = enc.width * enc.height * 1.5; + vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5; cpb_size = vbuf_size * 10; num_resources = 0; alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); @@ -429,7 +449,17 @@ static void amdgpu_cs_vce_encode(void) r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr); CU_ASSERT_EQUAL(r, 0); - memcpy(enc.vbuf.ptr, frame, sizeof(frame)); + + memset(enc.vbuf.ptr, 0, vbuf_size); + for (i = 0; i < enc.height; ++i) { + memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + for (i = 0; i < enc.height / 2; ++i) { + memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width); + enc.vbuf.ptr += ALIGN(enc.width, align); + } + r = amdgpu_bo_cpu_unmap(enc.vbuf.handle); CU_ASSERT_EQUAL(r, 0); commit 5a44f9e6c6a460a5ea0b698fb64d02b359927999 Author: Leo Liu <leo....@amd.com> Date: Mon Dec 5 11:18:09 2016 -0500 tests/amdgpu: add uvd unit test support for vega10 Signed-off-by: Leo Liu <leo....@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/tests/amdgpu/cs_tests.c b/tests/amdgpu/cs_tests.c index 82c55aa..0885d97 100644 --- a/tests/amdgpu/cs_tests.c +++ b/tests/amdgpu/cs_tests.c @@ -175,11 +175,11 @@ static int submit(unsigned ndw, unsigned ip) static void uvd_cmd(uint64_t addr, unsigned cmd, int *idx) { - ib_cpu[(*idx)++] = 0x3BC4; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC4 : 0x81C4; ib_cpu[(*idx)++] = addr; - ib_cpu[(*idx)++] = 0x3BC5; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC5 : 0x81C5; ib_cpu[(*idx)++] = addr >> 32; - ib_cpu[(*idx)++] = 0x3BC3; + ib_cpu[(*idx)++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC3 : 0x81C3; ib_cpu[(*idx)++] = cmd << 1; } @@ -211,10 +211,12 @@ static void amdgpu_cs_uvd_create(void) CU_ASSERT_EQUAL(r, 0); memcpy(msg, uvd_create_msg, sizeof(uvd_create_msg)); + if (family_id >= AMDGPU_FAMILY_VI) { ((uint8_t*)msg)[0x10] = 7; - /* chip polaris 10/11 */ - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) { + /* chip beyond polaris 10/11 */ + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { /* dpb size */ ((uint8_t*)msg)[0x28] = 0x00; ((uint8_t*)msg)[0x29] = 0x94; @@ -287,13 +289,15 @@ static void amdgpu_cs_uvd_decode(void) CU_ASSERT_EQUAL(r, 0); memcpy(ptr, uvd_decode_msg, sizeof(uvd_create_msg)); + if (family_id >= AMDGPU_FAMILY_VI) { ptr[0x10] = 7; ptr[0x98] = 0x00; ptr[0x99] = 0x02; - /* chip polaris10/11 */ - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) { - /*dpb size */ + /* chip beyond polaris10/11 */ + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { + /* dpb size */ ptr[0x24] = 0x00; ptr[0x25] = 0x94; ptr[0x26] = 0x6B; @@ -335,9 +339,11 @@ static void amdgpu_cs_uvd_decode(void) bs_addr = fb_addr + 4*1024; dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024); - if ((family_id >= AMDGPU_FAMILY_VI) && - (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { - ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); + if (family_id >= AMDGPU_FAMILY_VI) { + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) { + ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024); + } } dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024); @@ -348,12 +354,15 @@ static void amdgpu_cs_uvd_decode(void) uvd_cmd(dt_addr, 0x2, &i); uvd_cmd(fb_addr, 0x3, &i); uvd_cmd(bs_addr, 0x100, &i); + if (family_id >= AMDGPU_FAMILY_VI) { uvd_cmd(it_addr, 0x204, &i); - if (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A) + if ((family_id == AMDGPU_FAMILY_AI) || + (chip_id == chip_rev+0x50 || chip_id == chip_rev+0x5A)) uvd_cmd(ctx_addr, 0x206, &i); -} - ib_cpu[i++] = 0x3BC6; + } + + ib_cpu[i++] = (family_id < AMDGPU_FAMILY_AI) ? 0x3BC6 : 0x81C6; ib_cpu[i++] = 0x1; for (; i % 16; ++i) ib_cpu[i] = 0x80000000; commit fee173dc77295c8624291a4336075361d5dafd67 Author: Huang Rui <ray.hu...@amd.com> Date: Wed Nov 9 11:28:45 2016 +0800 tests/amdgpu: fix the count number for vega10 Signed-off-by: Huang Rui <ray.hu...@amd.com> Reviewed-by: Ken Wang <qingqing.w...@amd.com> Reviewed-by: Christian König <christian.koe...@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index bfda21b..4dce67e 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -803,12 +803,16 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; + struct amdgpu_gpu_info gpu_info = {0}; uint64_t bo_mc; volatile uint32_t *bo_cpu; int i, j, r, loop; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle va_handle; + r = amdgpu_query_gpu_info(device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); @@ -848,7 +852,10 @@ static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) SDMA_WRITE_SUB_OPCODE_LINEAR, 0); pm4[i++] = 0xffffffff & bo_mc; pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; while(j++ < sdma_write_length) pm4[i++] = 0xdeadbeaf; } else if ((ip_type == AMDGPU_HW_IP_GFX) || @@ -904,12 +911,16 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; + struct amdgpu_gpu_info gpu_info = {0}; uint64_t bo_mc; volatile uint32_t *bo_cpu; int i, j, r, loop; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle va_handle; + r = amdgpu_query_gpu_info(device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); @@ -949,7 +960,10 @@ static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) pm4[i++] = 0xffffffff & bo_mc; pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; pm4[i++] = 0xdeadbeaf; - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; } else if ((ip_type == AMDGPU_HW_IP_GFX) || (ip_type == AMDGPU_HW_IP_COMPUTE)) { pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); @@ -1007,12 +1021,16 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) uint32_t *pm4; struct amdgpu_cs_ib_info *ib_info; struct amdgpu_cs_request *ibs_request; + struct amdgpu_gpu_info gpu_info = {0}; uint64_t bo1_mc, bo2_mc; volatile unsigned char *bo1_cpu, *bo2_cpu; int i, j, r, loop1, loop2; uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; amdgpu_va_handle bo1_va_handle, bo2_va_handle; + r = amdgpu_query_gpu_info(device_handle, &gpu_info); + CU_ASSERT_EQUAL(r, 0); + pm4 = calloc(pm4_dw, sizeof(*pm4)); CU_ASSERT_NOT_EQUAL(pm4, NULL); @@ -1064,7 +1082,10 @@ static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) i = j = 0; if (ip_type == AMDGPU_HW_IP_DMA) { pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); - pm4[i++] = sdma_write_length; + if (gpu_info.family_id >= AMDGPU_FAMILY_AI) + pm4[i++] = sdma_write_length - 1; + else + pm4[i++] = sdma_write_length; pm4[i++] = 0; pm4[i++] = 0xffffffff & bo1_mc; pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; commit 99908bfd4ce3132e99aabc96c1ee4946b1246fa5 Author: Huang Rui <ray.hu...@amd.com> Date: Tue Nov 8 14:00:45 2016 +0800 amdgpu: don't read registers not present on Vega10 Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c index 66c7e0e..cd31e1b 100644 --- a/amdgpu/amdgpu_gpu_info.c +++ b/amdgpu/amdgpu_gpu_info.c @@ -182,40 +182,44 @@ drm_private int amdgpu_query_gpu_info_init(amdgpu_device_handle dev) dev->info.backend_disable[i] = (dev->info.backend_disable[i] >> 16) & 0xff; - r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, - &dev->info.pa_sc_raster_cfg[i]); - if (r) - return r; - - if (dev->info.family_id >= AMDGPU_FAMILY_CI) { - r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, - &dev->info.pa_sc_raster_cfg1[i]); + if (dev->info.family_id < AMDGPU_FAMILY_AI) { + r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, + &dev->info.pa_sc_raster_cfg[i]); if (r) return r; + + if (dev->info.family_id >= AMDGPU_FAMILY_CI) { + r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, + &dev->info.pa_sc_raster_cfg1[i]); + if (r) + return r; + } } } - r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, - dev->info.gb_tile_mode); + r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, + &dev->info.gb_addr_cfg); if (r) return r; - if (dev->info.family_id >= AMDGPU_FAMILY_CI) { - r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, - dev->info.gb_macro_tile_mode); + if (dev->info.family_id < AMDGPU_FAMILY_AI) { + r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, + dev->info.gb_tile_mode); if (r) return r; - } - r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, - &dev->info.gb_addr_cfg); - if (r) - return r; + if (dev->info.family_id >= AMDGPU_FAMILY_CI) { + r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, + dev->info.gb_macro_tile_mode); + if (r) + return r; + } - r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, - &dev->info.mc_arb_ramcfg); - if (r) - return r; + r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, + &dev->info.mc_arb_ramcfg); + if (r) + return r; + } dev->info.cu_active_number = dev->dev_info.cu_active_number; dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask; commit c34b28ae9bac7a20e60482a2bf72f16ad5e28c67 Author: Marek Olšák <marek.ol...@amd.com> Date: Tue Mar 21 20:14:45 2017 +0100 amdgpu: update amdgpu_drm.h for Vega10 Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 5797283..1e25a87 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -209,6 +209,7 @@ struct drm_amdgpu_gem_userptr { __u32 handle; }; +/* SI-CI-VI: */ /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ #define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 #define AMDGPU_TILING_ARRAY_MODE_MASK 0xf @@ -227,10 +228,14 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_NUM_BANKS_SHIFT 21 #define AMDGPU_TILING_NUM_BANKS_MASK 0x3 +/* GFX9 and later: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f + #define AMDGPU_TILING_SET(field, value) \ - (((value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) #define AMDGPU_TILING_GET(value, field) \ - (((value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) #define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 #define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 @@ -755,6 +760,7 @@ struct drm_amdgpu_info_vce_clock_table { #define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ #define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ #define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ #if defined(__cplusplus) } commit 3dc002df3e5607a3ae0a194b35e1f2fb2cd36697 Author: Marek Olšák <marek.ol...@amd.com> Date: Tue Mar 21 20:31:53 2017 +0100 amdgpu: sync amdgpu_drm.h with kernel 4.11-rc2 Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index d8f2497..5797283 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -50,6 +50,7 @@ extern "C" { #define DRM_AMDGPU_WAIT_CS 0x09 #define DRM_AMDGPU_GEM_OP 0x10 #define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -63,6 +64,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) #define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -79,22 +81,26 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that create shadow bo(GTT) while allocating vram bo */ +#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) struct drm_amdgpu_gem_create_in { /** the requested memory size */ - uint64_t bo_size; + __u64 bo_size; /** physical start_addr alignment in bytes for some HW requirements */ - uint64_t alignment; + __u64 alignment; /** the requested memory domains */ - uint64_t domains; + __u64 domains; /** allocation flags */ - uint64_t domain_flags; + __u64 domain_flags; }; struct drm_amdgpu_gem_create_out { /** returned GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; }; union drm_amdgpu_gem_create { @@ -111,28 +117,28 @@ union drm_amdgpu_gem_create { struct drm_amdgpu_bo_list_in { /** Type of operation */ - uint32_t operation; + __u32 operation; /** Handle of list or 0 if we want to create one */ - uint32_t list_handle; + __u32 list_handle; /** Number of BOs in list */ - uint32_t bo_number; + __u32 bo_number; /** Size of each element describing BO */ - uint32_t bo_info_size; + __u32 bo_info_size; /** Pointer to array describing BOs */ - uint64_t bo_info_ptr; + __u64 bo_info_ptr; }; struct drm_amdgpu_bo_list_entry { /** Handle of BO */ - uint32_t bo_handle; + __u32 bo_handle; /** New (if specified) BO priority to be used during migration */ - uint32_t bo_priority; + __u32 bo_priority; }; struct drm_amdgpu_bo_list_out { /** Handle of resource list */ - uint32_t list_handle; - uint32_t _pad; + __u32 list_handle; + __u32 _pad; }; union drm_amdgpu_bo_list { @@ -156,26 +162,26 @@ union drm_amdgpu_bo_list { struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ - uint32_t op; + __u32 op; /** For future use, no flags defined so far */ - uint32_t flags; - uint32_t ctx_id; - uint32_t _pad; + __u32 flags; + __u32 ctx_id; + __u32 _pad; }; union drm_amdgpu_ctx_out { struct { - uint32_t ctx_id; - uint32_t _pad; + __u32 ctx_id; + __u32 _pad; } alloc; struct { /** For future use, no flags defined so far */ - uint64_t flags; + __u64 flags; /** Number of resets caused by this context so far. */ - uint32_t hangs; + __u32 hangs; /** Reset status since the last call of the ioctl. */ - uint32_t reset_status; + __u32 reset_status; } state; }; @@ -195,12 +201,12 @@ union drm_amdgpu_ctx { #define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) struct drm_amdgpu_gem_userptr { - uint64_t addr; - uint64_t size; + __u64 addr; + __u64 size; /* AMDGPU_GEM_USERPTR_* */ - uint32_t flags; + __u32 flags; /* Resulting GEM handle */ - uint32_t handle; + __u32 handle; }; /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ @@ -232,28 +238,28 @@ struct drm_amdgpu_gem_userptr { /** The same structure is shared for input/output */ struct drm_amdgpu_gem_metadata { /** GEM Object handle */ - uint32_t handle; + __u32 handle; /** Do we want get or set metadata */ - uint32_t op; + __u32 op; struct { /** For future use, no flags defined so far */ - uint64_t flags; + __u64 flags; /** family specific tiling info */ - uint64_t tiling_info; - uint32_t data_size_bytes; - uint32_t data[64]; + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; } data; }; struct drm_amdgpu_gem_mmap_in { /** the GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; }; struct drm_amdgpu_gem_mmap_out { /** mmap offset from the vma offset manager */ - uint64_t addr_ptr; + __u64 addr_ptr; }; union drm_amdgpu_gem_mmap { @@ -263,18 +269,18 @@ union drm_amdgpu_gem_mmap { struct drm_amdgpu_gem_wait_idle_in { /** GEM object handle */ - uint32_t handle; + __u32 handle; /** For future use, no flags defined so far */ - uint32_t flags; + __u32 flags; /** Absolute timeout to wait */ - uint64_t timeout; + __u64 timeout; }; struct drm_amdgpu_gem_wait_idle_out { /** BO status: 0 - BO is idle, 1 - BO is busy */ - uint32_t status; + __u32 status; /** Returned current memory domain */ - uint32_t domain; + __u32 domain; }; union drm_amdgpu_gem_wait_idle { @@ -284,18 +290,18 @@ union drm_amdgpu_gem_wait_idle { struct drm_amdgpu_wait_cs_in { /** Command submission handle */ - uint64_t handle; + __u64 handle; /** Absolute timeout to wait */ - uint64_t timeout; - uint32_t ip_type; - uint32_t ip_instance; - uint32_t ring; - uint32_t ctx_id; + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; }; struct drm_amdgpu_wait_cs_out { /** CS status: 0 - CS completed, 1 - CS still busy */ - uint64_t status; + __u64 status; }; union drm_amdgpu_wait_cs { @@ -303,17 +309,43 @@ union drm_amdgpu_wait_cs { struct drm_amdgpu_wait_cs_out out; }; +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 #define AMDGPU_GEM_OP_SET_PLACEMENT 1 /* Sets or returns a value associated with a buffer. */ struct drm_amdgpu_gem_op { /** GEM object handle */ - uint32_t handle; + __u32 handle; /** AMDGPU_GEM_OP_* */ - uint32_t op; + __u32 op; /** Input or return value */ - uint64_t value; + __u64 value; }; #define AMDGPU_VA_OP_MAP 1 @@ -332,18 +364,18 @@ struct drm_amdgpu_gem_op { struct drm_amdgpu_gem_va { /** GEM object handle */ - uint32_t handle; - uint32_t _pad; + __u32 handle; + __u32 _pad; /** AMDGPU_VA_OP_* */ - uint32_t operation; + __u32 operation; /** AMDGPU_VM_PAGE_* */ - uint32_t flags; + __u32 flags; /** va address to assign . Must be correctly aligned.*/ - uint64_t va_address; + __u64 va_address; /** Specify offset inside of BO to assign. Must be correctly aligned.*/ - uint64_t offset_in_bo; + __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ - uint64_t map_size; + __u64 map_size; }; #define AMDGPU_HW_IP_GFX 0 @@ -360,24 +392,24 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 struct drm_amdgpu_cs_chunk { - uint32_t chunk_id; - uint32_t length_dw; - uint64_t chunk_data; + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; }; struct drm_amdgpu_cs_in { /** Rendering context id */ - uint32_t ctx_id; + __u32 ctx_id; /** Handle of resource list associated with CS */ - uint32_t bo_list_handle; - uint32_t num_chunks; - uint32_t _pad; - /** this points to uint64_t * which point to cs chunks */ - uint64_t chunks; + __u32 bo_list_handle; + __u32 num_chunks; + __u32 _pad; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; }; struct drm_amdgpu_cs_out { - uint64_t handle;