Matt Turner <matts...@gmail.com> writes: > From Message Descriptor section in gfxspecs: > > "Memory fence messages without Commit Enable set do not return > anything to the thread (response length is 0 and destination > register is null)." > > This fixes a GPU hang in simulation in the piglit test > arb_shader_image_load_store-shader-mem-barrier >
On what platform? > The mem fence message doesn't send any data, and previously we were > setting the SEND's src0 to the same register as the destination. I've > kept that behavior, so src0 will now be the null register in a number of > cases, necessitating a few changes in the EU validator. The simulator > and real hardware seem to be okay with this. > --- > src/intel/compiler/brw_eu_emit.c | 4 ++-- > src/intel/compiler/brw_eu_validate.c | 13 +++++++++++-- > src/intel/compiler/brw_fs_nir.cpp | 14 +++++++++++--- > src/intel/compiler/test_eu_validate.cpp | 9 +++++++++ > 4 files changed, 33 insertions(+), 7 deletions(-) > > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index f039af56d05..fe7fa8723e1 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3289,8 +3289,8 @@ brw_memory_fence(struct brw_codegen *p, > { > const struct gen_device_info *devinfo = p->devinfo; > const bool commit_enable = > - devinfo->gen >= 10 || /* HSD ES # 1404612949 */ > - (devinfo->gen == 7 && !devinfo->is_haswell); > + !(dst.file == BRW_ARCHITECTURE_REGISTER_FILE && > + dst.nr == BRW_ARF_NULL); > struct brw_inst *insn; > > brw_push_insn_state(p); > diff --git a/src/intel/compiler/brw_eu_validate.c > b/src/intel/compiler/brw_eu_validate.c > index d3189d1ef5e..e16dfc3aaf3 100644 > --- a/src/intel/compiler/brw_eu_validate.c > +++ b/src/intel/compiler/brw_eu_validate.c > @@ -168,6 +168,14 @@ src1_has_scalar_region(const struct gen_device_info > *devinfo, const brw_inst *in > brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; > } > > +static bool > +is_mfence(const struct gen_device_info *devinfo, const brw_inst *inst) > +{ > + return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND && > + brw_inst_sfid(devinfo, inst) == GEN7_SFID_DATAPORT_DATA_CACHE && > + brw_inst_dp_msg_type(devinfo, inst) == > GEN7_DATAPORT_DC_MEMORY_FENCE; > +} > + > static unsigned > num_sources_from_inst(const struct gen_device_info *devinfo, > const brw_inst *inst) > @@ -236,7 +244,7 @@ sources_not_null(const struct gen_device_info *devinfo, > if (num_sources == 3) > return (struct string){}; > > - if (num_sources >= 1) > + if (num_sources >= 1 && !is_mfence(devinfo, inst)) > ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); > > if (num_sources == 2) > @@ -256,7 +264,8 @@ send_restrictions(const struct gen_device_info *devinfo, > "send must use direct addressing"); > > if (devinfo->gen >= 7) { > - ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); > + ERROR_IF(!src0_is_grf(devinfo, inst) && !is_mfence(devinfo, inst), > + "send from non-GRF"); > ERROR_IF(brw_inst_eot(devinfo, inst) && > brw_inst_src0_da_reg_nr(devinfo, inst) < 112, > "send with EOT must use g112-g127"); > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index dbd2105f7e9..063f0256829 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -3859,9 +3859,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > case nir_intrinsic_memory_barrier_image: > case nir_intrinsic_memory_barrier: { > const fs_builder ubld = bld.group(8, 0); > - const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); > - ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) > - ->size_written = 2 * REG_SIZE; > + if (devinfo->gen == 7 && !devinfo->is_haswell) { > + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); > + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) > + ->size_written = 2 * REG_SIZE; > + } else { > + const fs_reg tmp = > + /* HSD ES #1404612949 */ > + devinfo->gen >= 10 ? ubld.vgrf(BRW_REGISTER_TYPE_UD) > + : bld.null_reg_d(); > + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp); > + } > break; > } > > diff --git a/src/intel/compiler/test_eu_validate.cpp > b/src/intel/compiler/test_eu_validate.cpp > index 161db994b2b..8169f951b2d 100644 > --- a/src/intel/compiler/test_eu_validate.cpp > +++ b/src/intel/compiler/test_eu_validate.cpp > @@ -168,6 +168,15 @@ TEST_P(validation_test, math_src1_null_reg) > } > } > > +TEST_P(validation_test, mfence_src0_null_reg) > +{ > + /* On HSW+ mfence's src0 is the null register */ > + if (devinfo.gen >= 8 || devinfo.is_haswell) { > + brw_memory_fence(p, null); > + EXPECT_TRUE(validate(p)); > + } > +} > + > TEST_P(validation_test, opcode46) > { > /* opcode 46 is "push" on Gen 4 and 5 > -- > 2.16.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev