We cannot zero out the destination register if it overlaps with the
source.  Use an Align1 instruction instead to zero out the high 16
bits after the conversion to half float.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 36 ++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index fa06fd6..9d2e65d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1127,7 +1127,15 @@ brw_inst *
 brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src)
 {
    const struct brw_context *brw = p->brw;
-   bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   const bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16;
+   /* The F32TO16 instruction doesn't support 32-bit destination types in
+    * Align1 mode, and neither does the Gen8 implementation in terms of a
+    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
+    * an undocumented feature.
+    */
+   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
+                                 brw->gen >= 8);
+   brw_inst *inst;
 
    if (align16) {
       assert(dst.type == BRW_REGISTER_TYPE_UD);
@@ -1137,18 +1145,28 @@ brw_F32TO16(struct brw_compile *p, struct brw_reg dst, 
struct brw_reg src)
              dst.type == BRW_REGISTER_TYPE_HF);
    }
 
+   brw_push_insn_state(p);
+
+   if (needs_zero_fill) {
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
+   }
+
    if (brw->gen >= 8) {
-      if (align16) {
-         /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's
-          * emit_pack_half_2x16 method.)
-          */
-         brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
-      }
-      return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
+      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
    } else {
       assert(brw->gen == 7);
-      return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
+   }
+
+   if (needs_zero_fill) {
+      brw_inst_set_no_dd_clear(brw, inst, true);
+      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+      brw_inst_set_no_dd_check(brw, inst, true);
    }
+
+   brw_pop_insn_state(p);
+   return inst;
 }
 
 brw_inst *
-- 
2.1.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to