On 25/08/17 20:09, Francisco Jerez wrote: > Alejandro Piñeiro <apinhe...@igalia.com> writes: > >> Although it is possible to emit them directly as AND/OR on brw_fs_nir, >> having specific opcodes makes it easier to remove duplicate settings >> later. >> >> Signed-off-by: Alejandro Piñeiro <apinhe...@igalia.com> >> Signed-off-by: Jose Maria Casanova Crespo <jmcasan...@igalia.com> >> --- >> src/intel/compiler/brw_eu.h | 3 +++ >> src/intel/compiler/brw_eu_defines.h | 9 +++++++++ >> src/intel/compiler/brw_eu_emit.c | 19 +++++++++++++++++++ >> src/intel/compiler/brw_fs_generator.cpp | 8 ++++++++ >> src/intel/compiler/brw_shader.cpp | 5 +++++ >> 5 files changed, 44 insertions(+) >> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >> index a3a9c63239d..0a7f8020398 100644 >> --- a/src/intel/compiler/brw_eu.h >> +++ b/src/intel/compiler/brw_eu.h >> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, >> struct brw_reg src, >> struct brw_reg idx); >> >> +void >> +brw_rounding_mode(struct brw_codegen *p, >> + enum brw_rnd_mode mode); >> /*********************************************************************** >> * brw_eu_util.c: >> */ >> diff --git a/src/intel/compiler/brw_eu_defines.h >> b/src/intel/compiler/brw_eu_defines.h >> index 1af835d47ed..50435df2fcf 100644 >> --- a/src/intel/compiler/brw_eu_defines.h >> +++ b/src/intel/compiler/brw_eu_defines.h >> @@ -388,6 +388,9 @@ enum opcode { >> SHADER_OPCODE_TYPED_SURFACE_WRITE, >> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, >> >> + SHADER_OPCODE_RND_MODE_RTE, >> + SHADER_OPCODE_RND_MODE_RTZ, >> + > We don't need an opcode for each possible rounding mode (there's also RU > and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode > taking an immediate with the right rounding mode? > > Also, you should be marking the rounding mode opcodes as > has_side_effects(), because otherwise you're giving the scheduler the > freedom of moving your rounding mode update instruction past the > instruction you wanted it to have an effect on... > >> SHADER_OPCODE_MEMORY_FENCE, >> >> SHADER_OPCODE_GEN4_SCRATCH_READ, >> @@ -1233,4 +1236,10 @@ enum brw_message_target { >> /* R0 */ >> # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 >> >> +enum PACKED brw_rnd_mode { >> + BRW_RND_MODE_UNSPECIFIED, >> + BRW_RND_MODE_RTE, >> + BRW_RND_MODE_RTZ, > Since you're introducing a back-end-specific rounding mode enum already, > why not use the hardware values right away so you avoid hard-coding > magic constants below. > >> +}; >> + >> #endif /* BRW_EU_DEFINES_H */ >> diff --git a/src/intel/compiler/brw_eu_emit.c >> b/src/intel/compiler/brw_eu_emit.c >> index 0b0d67a5c56..07ad3d9384b 100644 >> --- a/src/intel/compiler/brw_eu_emit.c >> +++ b/src/intel/compiler/brw_eu_emit.c >> @@ -3723,3 +3723,22 @@ brw_WAIT(struct brw_codegen *p) >> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); >> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); >> } >> + >> +void >> +brw_rounding_mode(struct brw_codegen *p, >> + enum brw_rnd_mode mode) >> +{ >> + switch (mode) { >> + case BRW_RND_MODE_UNSPECIFIED: >> + /* nothing to do here */ >> + break; >> + case BRW_RND_MODE_RTZ: >> + brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x00000030u)); >> + break; >> + case BRW_RND_MODE_RTE: >> + brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffffffcfu)); > This has undefined behavior because the ALU instructions you use to set > cr0 have non-zero latency, so the rounding mode change won't take effect > till ~8 cycles after the instruction is issued. Any instructions issued > in that window will pick up the wrong rounding mode. This is likely one > of the reasons for your observations off-list regarding some shaders > using the right or wrong rounding mode non-deterministically depending > on the scheduler's behaviour. > > Here's a spec quote from the SKL PRM suggesting a workaround you should > probably include in this commit: > > | Implementation Restriction on Register Access: When the control > | register is used as an explicit source and/or destination, hardware > | does not ensure execution pipeline coherency. Software must set the > | thread control field to ‘switch’ for an instruction that uses control > | register as an explicit operand. This is important as the control > | register is an implicit source for most instructions. For example, > | fields like FPMode and Accumulator Disable control the arithmetic > | and/or logic instructions. Therefore, if the instruction updating the > | control register doesn’t set ‘switch’, subsequent instructions may > | have undefined results.
Ok, thanks for the spec reference. I will use it to update the patch (or create a new one). BR > > >> + break; >> + default: >> + unreachable("Not reached"); >> + } >> +} >> diff --git a/src/intel/compiler/brw_fs_generator.cpp >> b/src/intel/compiler/brw_fs_generator.cpp >> index 2ade486705b..e0bd191ea7e 100644 >> --- a/src/intel/compiler/brw_fs_generator.cpp >> +++ b/src/intel/compiler/brw_fs_generator.cpp >> @@ -2139,6 +2139,14 @@ fs_generator::generate_code(const cfg_t *cfg, int >> dispatch_width) >> brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); >> break; >> >> + case SHADER_OPCODE_RND_MODE_RTE: >> + brw_rounding_mode(p, BRW_RND_MODE_RTE); >> + break; >> + >> + case SHADER_OPCODE_RND_MODE_RTZ: >> + brw_rounding_mode(p, BRW_RND_MODE_RTZ); >> + break; >> + >> default: >> unreachable("Unsupported opcode"); >> >> diff --git a/src/intel/compiler/brw_shader.cpp >> b/src/intel/compiler/brw_shader.cpp >> index c62b8ba6140..f22e204e262 100644 >> --- a/src/intel/compiler/brw_shader.cpp >> +++ b/src/intel/compiler/brw_shader.cpp >> @@ -486,6 +486,11 @@ brw_instruction_name(const struct gen_device_info >> *devinfo, enum opcode op) >> return "tes_add_indirect_urb_offset"; >> case TES_OPCODE_GET_PRIMITIVE_ID: >> return "tes_get_primitive_id"; >> + >> + case SHADER_OPCODE_RND_MODE_RTE: >> + return "round_mode_rte"; >> + case SHADER_OPCODE_RND_MODE_RTZ: >> + return "round_mode_rtz"; >> } >> >> unreachable("not reached"); >> -- >> 2.11.0 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev