From Message Descriptor section in gfxspecs: "Memory fence messages without Commit Enable set do not return anything to the thread (response length is 0 and destination register is null)."
This fixes a GPU hang in simulation in the piglit test arb_shader_image_load_store-shader-mem-barrier The mem fence message doesn't send any data, and previously we were setting the SEND's src0 to the same register as the destination. I've kept that behavior, so src0 will now be the null register in a number of cases, necessitating a few changes in the EU validator. The simulator and real hardware seem to be okay with this. --- src/intel/compiler/brw_eu_emit.c | 4 ++-- src/intel/compiler/brw_eu_validate.c | 13 +++++++++++-- src/intel/compiler/brw_fs_nir.cpp | 14 +++++++++++--- src/intel/compiler/test_eu_validate.cpp | 9 +++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index f039af56d05..fe7fa8723e1 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3289,8 +3289,8 @@ brw_memory_fence(struct brw_codegen *p, { const struct gen_device_info *devinfo = p->devinfo; const bool commit_enable = - devinfo->gen >= 10 || /* HSD ES # 1404612949 */ - (devinfo->gen == 7 && !devinfo->is_haswell); + !(dst.file == BRW_ARCHITECTURE_REGISTER_FILE && + dst.nr == BRW_ARF_NULL); struct brw_inst *insn; brw_push_insn_state(p); diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index d3189d1ef5e..e16dfc3aaf3 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -168,6 +168,14 @@ src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *in brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; } +static bool +is_mfence(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND && + brw_inst_sfid(devinfo, inst) == GEN7_SFID_DATAPORT_DATA_CACHE && + brw_inst_dp_msg_type(devinfo, inst) == GEN7_DATAPORT_DC_MEMORY_FENCE; +} + static unsigned num_sources_from_inst(const struct gen_device_info *devinfo, const brw_inst *inst) @@ -236,7 +244,7 @@ sources_not_null(const struct gen_device_info *devinfo, if (num_sources == 3) return (struct string){}; - if (num_sources >= 1) + if (num_sources >= 1 && !is_mfence(devinfo, inst)) ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); if (num_sources == 2) @@ -256,7 +264,8 @@ send_restrictions(const struct gen_device_info *devinfo, "send must use direct addressing"); if (devinfo->gen >= 7) { - ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); + ERROR_IF(!src0_is_grf(devinfo, inst) && !is_mfence(devinfo, inst), + "send from non-GRF"); ERROR_IF(brw_inst_eot(devinfo, inst) && brw_inst_src0_da_reg_nr(devinfo, inst) < 112, "send with EOT must use g112-g127"); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index dbd2105f7e9..063f0256829 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3859,9 +3859,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_memory_barrier_image: case nir_intrinsic_memory_barrier: { const fs_builder ubld = bld.group(8, 0); - const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); - ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) - ->size_written = 2 * REG_SIZE; + if (devinfo->gen == 7 && !devinfo->is_haswell) { + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + ->size_written = 2 * REG_SIZE; + } else { + const fs_reg tmp = + /* HSD ES #1404612949 */ + devinfo->gen >= 10 ? ubld.vgrf(BRW_REGISTER_TYPE_UD) + : bld.null_reg_d(); + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp); + } break; } diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 161db994b2b..8169f951b2d 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -168,6 +168,15 @@ TEST_P(validation_test, math_src1_null_reg) } } +TEST_P(validation_test, mfence_src0_null_reg) +{ + /* On HSW+ mfence's src0 is the null register */ + if (devinfo.gen >= 8 || devinfo.is_haswell) { + brw_memory_fence(p, null); + EXPECT_TRUE(validate(p)); + } +} + TEST_P(validation_test, opcode46) { /* opcode 46 is "push" on Gen 4 and 5 -- 2.16.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev