Chema Casanova <jmcasan...@igalia.com> writes: > El 25/08/17 a las 20:09, Francisco Jerez escribió: >> Alejandro Piñeiro <apinhe...@igalia.com> writes: >> >>> Although it is possible to emit them directly as AND/OR on brw_fs_nir, >>> having specific opcodes makes it easier to remove duplicate settings >>> later. >>> >>> Signed-off-by: Alejandro Piñeiro <apinhe...@igalia.com> >>> Signed-off-by: Jose Maria Casanova Crespo <jmcasan...@igalia.com> >>> --- >>> src/intel/compiler/brw_eu.h | 3 +++ >>> src/intel/compiler/brw_eu_defines.h | 9 +++++++++ >>> src/intel/compiler/brw_eu_emit.c | 19 +++++++++++++++++++ >>> src/intel/compiler/brw_fs_generator.cpp | 8 ++++++++ >>> src/intel/compiler/brw_shader.cpp | 5 +++++ >>> 5 files changed, 44 insertions(+) >>> >>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h >>> index a3a9c63239d..0a7f8020398 100644 >>> --- a/src/intel/compiler/brw_eu.h >>> +++ b/src/intel/compiler/brw_eu.h >>> @@ -500,6 +500,9 @@ brw_broadcast(struct brw_codegen *p, >>> struct brw_reg src, >>> struct brw_reg idx); >>> >>> +void >>> +brw_rounding_mode(struct brw_codegen *p, >>> + enum brw_rnd_mode mode); >>> /*********************************************************************** >>> * brw_eu_util.c: >>> */ >>> diff --git a/src/intel/compiler/brw_eu_defines.h >>> b/src/intel/compiler/brw_eu_defines.h >>> index 1af835d47ed..50435df2fcf 100644 >>> --- a/src/intel/compiler/brw_eu_defines.h >>> +++ b/src/intel/compiler/brw_eu_defines.h >>> @@ -388,6 +388,9 @@ enum opcode { >>> SHADER_OPCODE_TYPED_SURFACE_WRITE, >>> SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, >>> >>> + SHADER_OPCODE_RND_MODE_RTE, >>> + SHADER_OPCODE_RND_MODE_RTZ, >>> + >> We don't need an opcode for each possible rounding mode (there's also RU >> and RD). How about you add a single SHADER_OPCODE_RND_MODE opcode >> taking an immediate with the right rounding mode? > I like the proposal. It is better having a unique opcode for setting the > rounding mode. Changed for v2 of this patch. >> Also, you should be marking the rounding mode opcodes as >> has_side_effects(), because otherwise you're giving the scheduler the >> freedom of moving your rounding mode update instruction past the >> instruction you wanted it to have an effect on... > Well pointed, we already realized that it was missing while debugging > the latency problem of the control register. It was hiding the problem > re-scheduling the cr0 modification to the beginning of the shader. >>> SHADER_OPCODE_MEMORY_FENCE, >>> >>> SHADER_OPCODE_GEN4_SCRATCH_READ, >>> @@ -1233,4 +1236,10 @@ enum brw_message_target { >>> /* R0 */ >>> # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 >>> >>> +enum PACKED brw_rnd_mode { >>> + BRW_RND_MODE_UNSPECIFIED, >>> + BRW_RND_MODE_RTE, >>> + BRW_RND_MODE_RTZ, >> Since you're introducing a back-end-specific rounding mode enum already, >> why not use the hardware values right away so you avoid hard-coding >> magic constants below. > At the end we removed MODE_UNSPECIFIED as it isn't really needed and we > include Round Up and Down in the enum assignation based using the PRM > values. Also renamed RTE for RTNE to maintain coherence with nir > conversion modifiers using the same acronym as PRM. > > +enum PACKED brw_rnd_mode { > + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ > + BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ > + BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ > + BRW_RND_MODE_RTZ = 3 /* Round Toward Zero */ > +}; > > I have a doubt about how to avoid the magic constants to close the v2 of > this patch. One approach would be using the same code structure and > taking advantage using the codification of rounding field. This way we > just formula and expect that the C compiler optimizer to guess that the > immediate value is a constant. >
Hm, I'm not particularly worried about the C++ compiler behavior, it's most likely immaterial. > switch (mode) { > case BRW_RND_MODE_RTZ: > - inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(0x00000030u)); > + inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(((unsigned int) mode << 4))); > break; > - case BRW_RND_MODE_RTE: > - inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(0xffffffcfu)); > + case BRW_RND_MODE_RTNE: > + inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), > brw_imm_ud(((unsigned int) mode << 4) | ~0x00000030u)); > break; > default: > I don't think this is particularly advantageous since you still need to special-case every possible rounding mode. > Another approach could be to implement a general solution for all > supported rounding modes by the hw including Round Up and Down using > bitwise operations. > > /** > * Changes the floating point rounding mode updating the control register > * field defined at cr0.0[5-6] bits. This function supports the changes to > * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise > operations. > * Only RTNE and RTZ rounding are enabled at nir. > */ > > void > brw_rounding_mode(struct brw_codegen *p, > enum brw_rnd_mode mode) > { > > const unsigned int mask = 0x0000000030u; > const unsigned int enable_bits = ((unsigned int) mode) << 4; > const unsigned int disable_bits = enable_bits | ~mask; > > /* Used by RTNE rounding to set field to 00 */ > if (disable_bits != ~0) { > brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(disable_bits)); > } > /* Used by RTZ rounding to set field to 11 */ > if (enable_bits) { > brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(enable_bits)); > } > } > Looks good (except for the SWITCH thread control flag still missing), though you could simplify it slightly: | void | brw_rounding_mode(struct brw_codegen *p, enum brw_rnd_mode mode) | { | const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; | | if (bits != BRW_RND_MODE_MASK) { | brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), | brw_imm_ud(~BRW_RND_MODE_MASK)); | brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); | } | | if (bits) { | brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), | brw_imm_ud(bits)); | brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); | } | } > Any preference ? > > Thanks for the review. > > Chema > >> >>> +}; >>> + >>> #endif /* BRW_EU_DEFINES_H */ >>> diff --git a/src/intel/compiler/brw_eu_emit.c >>> b/src/intel/compiler/brw_eu_emit.c >>> index 0b0d67a5c56..07ad3d9384b 100644 >>> --- a/src/intel/compiler/brw_eu_emit.c >>> +++ b/src/intel/compiler/brw_eu_emit.c >>> @@ -3723,3 +3723,22 @@ brw_WAIT(struct brw_codegen *p) >>> brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); >>> brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); >>> } >>> + >>> +void >>> +brw_rounding_mode(struct brw_codegen *p, >>> + enum brw_rnd_mode mode) >>> +{ >>> + switch (mode) { >>> + case BRW_RND_MODE_UNSPECIFIED: >>> + /* nothing to do here */ >>> + break; >>> + case BRW_RND_MODE_RTZ: >>> + brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0x00000030u)); >>> + break; >>> + case BRW_RND_MODE_RTE: >>> + brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), brw_imm_ud(0xffffffcfu)); >> This has undefined behavior because the ALU instructions you use to set >> cr0 have non-zero latency, so the rounding mode change won't take effect >> till ~8 cycles after the instruction is issued. Any instructions issued >> in that window will pick up the wrong rounding mode. This is likely one >> of the reasons for your observations off-list regarding some shaders >> using the right or wrong rounding mode non-deterministically depending >> on the scheduler's behaviour. >> >> Here's a spec quote from the SKL PRM suggesting a workaround you should >> probably include in this commit: >> >> | Implementation Restriction on Register Access: When the control >> | register is used as an explicit source and/or destination, hardware >> | does not ensure execution pipeline coherency. Software must set the >> | thread control field to ‘switch’ for an instruction that uses control >> | register as an explicit operand. This is important as the control >> | register is an implicit source for most instructions. For example, >> | fields like FPMode and Accumulator Disable control the arithmetic >> | and/or logic instructions. Therefore, if the instruction updating the >> | control register doesn’t set ‘switch’, subsequent instructions may >> | have undefined results. >> >> >>> + break; >>> + default: >>> + unreachable("Not reached"); >>> + } >>> +} >>> diff --git a/src/intel/compiler/brw_fs_generator.cpp >>> b/src/intel/compiler/brw_fs_generator.cpp >>> index 2ade486705b..e0bd191ea7e 100644 >>> --- a/src/intel/compiler/brw_fs_generator.cpp >>> +++ b/src/intel/compiler/brw_fs_generator.cpp >>> @@ -2139,6 +2139,14 @@ fs_generator::generate_code(const cfg_t *cfg, int >>> dispatch_width) >>> brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); >>> break; >>> >>> + case SHADER_OPCODE_RND_MODE_RTE: >>> + brw_rounding_mode(p, BRW_RND_MODE_RTE); >>> + break; >>> + >>> + case SHADER_OPCODE_RND_MODE_RTZ: >>> + brw_rounding_mode(p, BRW_RND_MODE_RTZ); >>> + break; >>> + >>> default: >>> unreachable("Unsupported opcode"); >>> >>> diff --git a/src/intel/compiler/brw_shader.cpp >>> b/src/intel/compiler/brw_shader.cpp >>> index c62b8ba6140..f22e204e262 100644 >>> --- a/src/intel/compiler/brw_shader.cpp >>> +++ b/src/intel/compiler/brw_shader.cpp >>> @@ -486,6 +486,11 @@ brw_instruction_name(const struct gen_device_info >>> *devinfo, enum opcode op) >>> return "tes_add_indirect_urb_offset"; >>> case TES_OPCODE_GET_PRIMITIVE_ID: >>> return "tes_get_primitive_id"; >>> + >>> + case SHADER_OPCODE_RND_MODE_RTE: >>> + return "round_mode_rte"; >>> + case SHADER_OPCODE_RND_MODE_RTZ: >>> + return "round_mode_rtz"; >>> } >>> >>> unreachable("not reached"); >>> -- >>> 2.11.0 >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>> >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev