On Wed, 2019-01-02 at 11:46 +0200, Pohjolainen, Topi wrote: > On Wed, Dec 19, 2018 at 12:51:03PM +0100, Iago Toral Quiroga wrote: > > Broadwell hardware has a bug that manifests in SIMD8 executions of > > 16-bit MAD instructions when any of the sources is a Y or W > > component. > > We pack these components in the same SIMD register as components X > > and > > Z respectively, but starting at offset 16B (so they live in the > > second > > half of the register). The problem does not exist in SKL or later. > > > > We work around this issue by moving any such sources to a temporary > > starting at offset 0B. We want to do this after the main > > optimization loop > > to prevent copy-propagation and friends to undo the fix. > > --- > > src/intel/compiler/brw_fs.cpp | 48 > > +++++++++++++++++++++++++++++++++++ > > src/intel/compiler/brw_fs.h | 1 + > > 2 files changed, 49 insertions(+) > > > > diff --git a/src/intel/compiler/brw_fs.cpp > > b/src/intel/compiler/brw_fs.cpp > > index 933b0b6ffc4..1343c2f4993 100644 > > --- a/src/intel/compiler/brw_fs.cpp > > +++ b/src/intel/compiler/brw_fs.cpp > > @@ -6449,6 +6449,48 @@ fs_visitor::optimize() > > validate(); > > } > > > > +/** > > + * Broadwell hardware has a bug that manifests in SIMD8 executions > > of 16-bit > > + * MAD instructions when any of the sources is a Y or W component. > > We pack > > + * these components in the same SIMD register as components X and > > Z > > + * respectively, but starting at offset 16B (so they live in the > > second half > > + * of the register). > > + * > > + * We work around this issue by moving any such sources to a > > temporary > > + * starting at offset 0B. We want to do this after the main > > optimization loop > > + * to prevent copy-propagation and friends to undo the fix. > > + */ > > +void > > +fs_visitor::fixup_hf_mad() > > +{ > > + if (devinfo->gen > 8) > > We don't want to run this for gen < 8 either as it would iterate the > instructions in vain. So just: > > if (devinfo->gen == 8)
Right, good point. It should be "if (devinfo->gen != 8) though. I'll fix this, thanks! > Otherwise: > > Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > > > + return; > > + > > + bool progress = false; > > + > > + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { > > + if (inst->opcode != BRW_OPCODE_MAD || > > + inst->dst.type != BRW_REGISTER_TYPE_HF || > > + inst->exec_size > 8) > > + continue; > > + > > + for (int i = 0; i < 3; i++) { > > + if (inst->src[i].offset > 0) { > > + assert(inst->src[i].type == BRW_REGISTER_TYPE_HF); > > + const fs_builder ibld = > > + bld.at(block, inst).exec_all().group(inst- > > >exec_size, 0); > > + fs_reg tmp = ibld.vgrf(inst->src[i].type); > > + ibld.MOV(tmp, inst->src[i]); > > + inst->src[i] = tmp; > > + progress = true; > > + } > > + } > > + } > > + > > + if (progress) > > + invalidate_live_intervals(); > > +} > > + > > /** > > * Three source instruction must have a GRF/MRF destination > > register. > > * ARF NULL is not allowed. Fix that up by allocating a temporary > > GRF. > > @@ -6607,6 +6649,7 @@ fs_visitor::run_vs() > > assign_curb_setup(); > > assign_vs_urb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(8, true); > > > > @@ -6691,6 +6734,7 @@ fs_visitor::run_tcs_single_patch() > > assign_curb_setup(); > > assign_tcs_single_patch_urb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(8, true); > > > > @@ -6725,6 +6769,7 @@ fs_visitor::run_tes() > > assign_curb_setup(); > > assign_tes_urb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(8, true); > > > > @@ -6774,6 +6819,7 @@ fs_visitor::run_gs() > > assign_curb_setup(); > > assign_gs_urb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(8, true); > > > > @@ -6874,6 +6920,7 @@ fs_visitor::run_fs(bool allow_spilling, bool > > do_rep_send) > > > > assign_urb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(8, allow_spilling); > > > > @@ -6918,6 +6965,7 @@ fs_visitor::run_cs(unsigned > > min_dispatch_width) > > > > assign_curb_setup(); > > > > + fixup_hf_mad(); > > fixup_3src_null_dest(); > > allocate_registers(min_dispatch_width, true); > > > > diff --git a/src/intel/compiler/brw_fs.h > > b/src/intel/compiler/brw_fs.h > > index 163c0008820..f79f8554fb9 100644 > > --- a/src/intel/compiler/brw_fs.h > > +++ b/src/intel/compiler/brw_fs.h > > @@ -103,6 +103,7 @@ public: > > void setup_vs_payload(); > > void setup_gs_payload(); > > void setup_cs_payload(); > > + void fixup_hf_mad(); > > void fixup_3src_null_dest(); > > void assign_curb_setup(); > > void calculate_urb_setup(); > > -- > > 2.17.1 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev