We cannot zero out the destination register if it overlaps with the source. Use an Align1 instruction instead to zero out the high 16 bits after the conversion to half float. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 36 ++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index fa06fd6..9d2e65d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1127,7 +1127,15 @@ brw_inst * brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) { const struct brw_context *brw = p->brw; - bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + /* The F32TO16 instruction doesn't support 32-bit destination types in + * Align1 mode, and neither does the Gen8 implementation in terms of a + * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as + * an undocumented feature. + */ + const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD && + brw->gen >= 8); + brw_inst *inst; if (align16) { assert(dst.type == BRW_REGISTER_TYPE_UD); @@ -1137,18 +1145,28 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) dst.type == BRW_REGISTER_TYPE_HF); } + brw_push_insn_state(p); + + if (needs_zero_fill) { + brw_set_default_access_mode(p, BRW_ALIGN_1); + dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2); + } + if (brw->gen >= 8) { - if (align16) { - /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's - * emit_pack_half_2x16 method.) - */ - brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); - } - return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); + inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); } else { assert(brw->gen == 7); - return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); + inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); + } + + if (needs_zero_fill) { + brw_inst_set_no_dd_clear(brw, inst, true); + inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u)); + brw_inst_set_no_dd_check(brw, inst, true); } + + brw_pop_insn_state(p); + return inst; } brw_inst * -- 2.1.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev