From: Nicolai Hähnle <nicolai.haeh...@amd.com> TGSI's BFI/BFE specifies that the bits and offset are masked by 0x1f. GLSL has no such masking and simply says that the result is undefined when bits or offset are negative or bits + offset > 32.
The GLSL definition is annoying from a driver perspective, because mapping the GLSL intrinsics directly to TGSI means the case bits == 32, offset == 0 is handled incorrectly. The TGSI definition is the one from DirectX and matches AMD GCN. This patch adds the necessary workaround instructions in st_glsl_to_tgsi, which should be correct for all drivers. If there is hardware that implements GLSL-style bitfield operations natively, we could add extra opcodes and a pipe cap so that the corresponding drivers don't lose performance. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f376462..5f49612 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2154,26 +2154,36 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); break; case ir_triop_csel: if (this->ctx->Const.NativeIntegers) emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); else { op[0].negate = ~op[0].negate; emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); } break; - case ir_triop_bitfield_extract: - emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); + case ir_triop_bitfield_extract: { + st_src_reg is_ge32 = get_temp(glsl_type::uint_type); + st_src_reg bfe = get_temp(ir->type); + emit_asm(ir, TGSI_OPCODE_ISGE, st_dst_reg(is_ge32), op[2], st_src_reg_for_int(32)); + emit_asm(ir, TGSI_OPCODE_IBFE, st_dst_reg(bfe), op[0], op[1], op[2]); + emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, is_ge32, op[0], bfe); break; - case ir_quadop_bitfield_insert: - emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); + } + case ir_quadop_bitfield_insert: { + st_src_reg is_ge32 = get_temp(glsl_type::uint_type); + st_src_reg bfi = get_temp(ir->type); + emit_asm(ir, TGSI_OPCODE_ISGE, st_dst_reg(is_ge32), op[3], st_src_reg_for_int(32)); + emit_asm(ir, TGSI_OPCODE_BFI, st_dst_reg(bfi), op[0], op[1], op[2], op[3]); + emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, is_ge32, op[1], bfi); break; + } case ir_unop_bitfield_reverse: emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); break; case ir_unop_bit_count: emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); break; case ir_unop_find_msb: emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); break; case ir_unop_find_lsb: -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev