Render Target Message's payloads for 16bit values fit in only one register.
From Intel PRM vol07, page 249 "Render Target Messages" / "Message Data Payloads" "The half precision Render Target Write messages have data payloads that can pack a full SIMD16 payload into 1 register instead of two. The half-precision packed format is used for RGBA and Source 0 Alpha, but Source Depth data payload is always supplied in full precision." So when 16-bit data is uploaded to the payload it will use 1 register independently of it is SIMD16 or SIMD8. This change implies that we need to replicate the approach in the copy propagation of the load_payload operations. v2: By default 16-bit sources should be packed (Jason Ekstrand) Include changes in in copy_propagation of load_payload (Chema Casanova) --- src/intel/compiler/brw_fs.cpp | 5 ++++- src/intel/compiler/brw_fs_copy_propagation.cpp | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b695508823..b1e548fd93 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3485,7 +3485,10 @@ fs_visitor::lower_load_payload() for (uint8_t i = inst->header_size; i < inst->sources; i++) { if (inst->src[i].file != BAD_FILE) ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); - dst = offset(dst, ibld, 1); + if (type_sz(inst->src[i].type) == 2) + dst = byte_offset(dst, REG_SIZE); + else + dst = offset(dst, ibld, 1); } inst->remove(block); diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index d4d01d783c..470eaeec4f 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -800,7 +800,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, int offset = 0; for (int i = 0; i < inst->sources; i++) { int effective_width = i < inst->header_size ? 8 : inst->exec_size; - assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0); + assert(effective_width * MAX2(4, type_sz(inst->src[i].type)) % REG_SIZE == 0); const unsigned size_written = effective_width * type_sz(inst->src[i].type); if (inst->src[i].file == VGRF) { @@ -816,7 +816,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, ralloc_free(entry); } } - offset += size_written; + offset += type_sz(inst->src[i].type) == 2 ? REG_SIZE : size_written; } } } -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev