> On 31 Jul 2024, at 11:31, Claudio Bantaloukas <claudio.bantalou...@arm.com> > wrote: > > External email: Use caution opening links or attachments > > > On 31/07/2024 08:57, Kyrylo Tkachov wrote: >> Hi Claudio, >> >>> On 31 Jul 2024, at 08:29, Claudio Bantaloukas <claudio.bantalou...@arm.com> >>> wrote: >>> >>> External email: Use caution opening links or attachments >>> >>> >>> Unlike most system registers, fpmr can be heavily written to in code that >>> exercises the fp8 functionality. That is because every fp8 instrinsic call >>> can potentially change the value of fpmr. >>> Rather than just use an unspec, we treat the fpmr system register like >>> all other registers and use a move operation to read and write to it. >>> >>> We introduce a new class of moveable system registers that, currently, >>> only accepts fpmr and a new constraint, Umv, that allows us to >>> selectively use mrs and msr instructions when expanding rtl for them. >>> Given that there is code that depends on "real" registers coming before >>> "fake" ones, we introduce a new constant FPM_REGNUM that uses an >>> existing value and renumber registers below that. >>> This requires us to update the bitmaps that describe which registers >>> belong to each register class. >> >> So I like the approach though I’ll let Richard review the implementation >> details here. >> My only slight concern here is compatibility with LLVM. I notice that LLVM >> doesn’t accept the test case you’ve included as it doesn’t understand “fpmr” >> in its inline assembly. It also doesn’t support the new constraint, of >> course. >> Do you know if there are plans to teach LLVM these inline assembly >> constructs to avoid creating GCC-only sources for fp8? > > Hi Kyrill, > I asked and got confirmation that a patch to add fpmr as a register > should land soon.
Great, thanks for confirming. Kyrill > > Cheers, > Claudio > >> Thanks, >> Kyrill >> >> >>> >>> gcc/ChangeLog: >>> >>> * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add >>> support for MOVEABLE_SYSREGS class. >>> (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. >>> (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. >>> (aarch64_class_max_nregs): Likewise. >>> * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. >>> (CALL_REALLY_USED_REGISTERS): Likewise. >>> (REGISTER_NAMES): Likewise. >>> (enum reg_class): Add MOVEABLE_SYSREGS class. >>> (REG_CLASS_NAMES): Likewise. >>> (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, >>> the new MOVEABLE_REGS class and renumbering of registers. >>> * config/aarch64/aarch64.md: (FPM_REGNUM): added new register >>> number, reusing old value. >>> (FFR_REGNUM): Renumber. >>> (FFRT_REGNUM): Likewise. >>> (LOWERING_REGNUM): Likewise. >>> (TPIDR2_BLOCK_REGNUM): Likewise. >>> (SME_STATE_REGNUM): Likewise. >>> (TPIDR2_SETUP_REGNUM): Likewise. >>> (ZA_FREE_REGNUM): Likewise. >>> (ZA_SAVED_REGNUM): Likewise. >>> (ZA_REGNUM): Likewise. >>> (ZT0_REGNUM): Likewise. >>> (*mov<mode>_aarch64): Add support for moveable sysregs. >>> (*movsi_aarch64): Likewise. >>> (*movdi_aarch64): Likewise. >>> * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.target/aarch64/acle/fp8.c: New tests. >>> --- >>> gcc/config/aarch64/aarch64.cc | 8 ++ >>> gcc/config/aarch64/aarch64.h | 14 ++- >>> gcc/config/aarch64/aarch64.md | 30 ++++-- >>> gcc/config/aarch64/constraints.md | 3 + >>> gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 101 ++++++++++++++++++++ >>> 5 files changed, 142 insertions(+), 14 deletions(-) >>> >>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc >>> index e0cf382998c..9810f2c0390 100644 >>> --- a/gcc/config/aarch64/aarch64.cc >>> +++ b/gcc/config/aarch64/aarch64.cc >>> @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, >>> machine_mode mode) >>> case PR_HI_REGS: >>> return mode == VNx32BImode ? 2 : 1; >>> >>> + case MOVEABLE_SYSREGS: >>> case FFR_REGS: >>> case PR_AND_FFR_REGS: >>> case FAKE_REGS: >>> @@ -2045,6 +2046,9 @@ aarch64_hard_regno_mode_ok (unsigned regno, >>> machine_mode mode) >>> /* This must have the same size as _Unwind_Word. */ >>> return mode == DImode; >>> >>> + if (regno == FPM_REGNUM) >>> + return mode == QImode || mode == HImode || mode == SImode || mode == >>> DImode; >>> + >>> unsigned int vec_flags = aarch64_classify_vector_mode (mode); >>> if (vec_flags == VEC_SVE_PRED) >>> return pr_or_ffr_regnum_p (regno); >>> @@ -12680,6 +12684,9 @@ aarch64_regno_regclass (unsigned regno) >>> if (PR_REGNUM_P (regno)) >>> return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; >>> >>> + if (regno == FPM_REGNUM) >>> + return MOVEABLE_SYSREGS; >>> + >>> if (regno == FFR_REGNUM || regno == FFRT_REGNUM) >>> return FFR_REGS; >>> >>> @@ -13068,6 +13075,7 @@ aarch64_class_max_nregs (reg_class_t regclass, >>> machine_mode mode) >>> case PR_HI_REGS: >>> return mode == VNx32BImode ? 2 : 1; >>> >>> + case MOVEABLE_SYSREGS: >>> case STACK_REG: >>> case FFR_REGS: >>> case PR_AND_FFR_REGS: >>> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h >>> index 2e75c6b81e2..2dfb999bea5 100644 >>> --- a/gcc/config/aarch64/aarch64.h >>> +++ b/gcc/config/aarch64/aarch64.h >>> @@ -523,6 +523,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >>> ATTRIBUTE_UNUSED >>> 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ >>> 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ >>> 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ >>> + 1, /* FPMR */ \ >>> 1, 1, /* FFR and FFRT */ \ >>> 1, 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ >>> } >>> @@ -547,6 +548,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >>> ATTRIBUTE_UNUSED >>> 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ >>> 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ >>> 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ >>> + 1, /* FPMR */ \ >>> 1, 1, /* FFR and FFRT */ \ >>> 0, 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ >>> } >>> @@ -564,6 +566,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >>> ATTRIBUTE_UNUSED >>> "sfp", "ap", "cc", "vg", \ >>> "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ >>> "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ >>> + "fpmr", \ >>> "ffr", "ffrt", \ >>> "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ >>> "za_free", "za_saved", "za", "zt0" \ >>> @@ -775,6 +778,7 @@ enum reg_class >>> PR_REGS, >>> FFR_REGS, >>> PR_AND_FFR_REGS, >>> + MOVEABLE_SYSREGS, >>> FAKE_REGS, >>> ALL_REGS, >>> LIM_REG_CLASSES /* Last */ >>> @@ -801,6 +805,7 @@ enum reg_class >>> "PR_REGS", \ >>> "FFR_REGS", \ >>> "PR_AND_FFR_REGS", \ >>> + "MOVEABLE_SYSREGS", \ >>> "FAKE_REGS", \ >>> "ALL_REGS" \ >>> } >>> @@ -822,10 +827,11 @@ enum reg_class >>> { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ >>> { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ >>> { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ >>> - { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ >>> - { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ >>> - { 0x00000000, 0x00000000, 0x3fc00000 }, /* FAKE_REGS */ \ >>> - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ >>> + { 0x00000000, 0x00000000, 0x00600000 }, /* FFR_REGS */ \ >>> + { 0x00000000, 0x00000000, 0x006ffff0 }, /* PR_AND_FFR_REGS */ \ >>> + { 0x00000000, 0x00000000, 0x00100000 }, /* MOVEABLE_SYSREGS */ \ >>> + { 0x00000000, 0x00000000, 0x7f800000 }, /* FAKE_REGS */ \ >>> + { 0xffffffff, 0xffffffff, 0x001fffff } /* ALL_REGS */ \ >>> } >>> >>> #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) >>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md >>> index ed29127dafb..ed1bd2ede7d 100644 >>> --- a/gcc/config/aarch64/aarch64.md >>> +++ b/gcc/config/aarch64/aarch64.md >>> @@ -107,10 +107,14 @@ (define_constants >>> (P14_REGNUM 82) >>> (P15_REGNUM 83) >>> (LAST_SAVED_REGNUM 83) >>> - (FFR_REGNUM 84) >>> + >>> + ;; Floating Point Mode Register, used in FP8 insns. >>> + (FPM_REGNUM 84) >>> + >>> + (FFR_REGNUM 85) >>> ;; "FFR token": a fake register used for representing the scheduling >>> ;; restrictions on FFR-related operations. >>> - (FFRT_REGNUM 85) >>> + (FFRT_REGNUM 86) >>> >>> ;; ---------------------------------------------------------------- >>> ;; Fake registers >>> @@ -122,17 +126,17 @@ (define_constants >>> ;; ABI-related lowering is needed. These placeholders read and >>> ;; write this register. Instructions that depend on the lowering >>> ;; read the register. >>> - (LOWERING_REGNUM 86) >>> + (LOWERING_REGNUM 87) >>> >>> ;; Represents the contents of the current function's TPIDR2 block, >>> ;; in abstract form. >>> - (TPIDR2_BLOCK_REGNUM 87) >>> + (TPIDR2_BLOCK_REGNUM 88) >>> >>> ;; Holds the value that the current function wants PSTATE.ZA to be. >>> ;; The actual value can sometimes vary, because it does not track >>> ;; changes to PSTATE.ZA that happen during a lazy save and restore. >>> ;; Those effects are instead tracked by ZA_SAVED_REGNUM. >>> - (SME_STATE_REGNUM 88) >>> + (SME_STATE_REGNUM 89) >>> >>> ;; Instructions write to this register if they set TPIDR2_EL0 to a >>> ;; well-defined value. Instructions read from the register if they >>> @@ -140,14 +144,14 @@ (define_constants >>> ;; >>> ;; The register does not model the architected TPIDR2_ELO, just the >>> ;; current function's management of it. >>> - (TPIDR2_SETUP_REGNUM 89) >>> + (TPIDR2_SETUP_REGNUM 90) >>> >>> ;; Represents the property "has an incoming lazy save been committed?". >>> - (ZA_FREE_REGNUM 90) >>> + (ZA_FREE_REGNUM 91) >>> >>> ;; Represents the property "are the current function's ZA contents >>> ;; stored in the lazy save buffer, rather than in ZA itself?". >>> - (ZA_SAVED_REGNUM 91) >>> + (ZA_SAVED_REGNUM 92) >>> >>> ;; Represents the contents of the current function's ZA state in >>> ;; abstract form. At various times in the function, these contents >>> @@ -155,10 +159,10 @@ (define_constants >>> ;; >>> ;; The contents persist even when the architected ZA is off. Private-ZA >>> ;; functions have no effect on its contents. >>> - (ZA_REGNUM 92) >>> + (ZA_REGNUM 93) >>> >>> ;; Similarly represents the contents of the current function's ZT0 >>> state. >>> - (ZT0_REGNUM 93) >>> + (ZT0_REGNUM 94) >>> >>> (FIRST_FAKE_REGNUM LOWERING_REGNUM) >>> (LAST_FAKE_REGNUM ZT0_REGNUM) >>> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" >>> [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 >>> [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] >>> [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 >>> + [Umv, r ; mrs , * ] msr\t%0, %x1 >>> + [r, Umv ; mrs , * ] mrs\t%x0, %1 >>> } >>> ) >>> >>> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" >>> [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 >>> [w , w ; fmov , fp , 4] fmov\t%s0, %s1 >>> [w , Ds ; neon_move, simd, 4] << >>> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); >>> + [Umv, r ; mrs , * , 4] msr\t%0, %x1 >>> + [r, Umv ; mrs , * , 4] mrs\t%x0, %1 >>> } >>> "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), >>> SImode) >>> && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" >>> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" >>> [w, w ; fmov , fp , 4] fmov\t%d0, %d1 >>> [w, Dd ; neon_move, simd, 4] << >>> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); >>> [w, Dx ; neon_move, simd, 8] # >>> + [Umv, r; mrs , * , 4] msr\t%0, %1 >>> + [r, Umv; mrs , * , 4] mrs\t%0, %1 >>> } >>> "CONST_INT_P (operands[1]) >>> && REG_P (operands[0]) >>> diff --git a/gcc/config/aarch64/constraints.md >>> b/gcc/config/aarch64/constraints.md >>> index a2569cea510..0c81fb28f7e 100644 >>> --- a/gcc/config/aarch64/constraints.md >>> +++ b/gcc/config/aarch64/constraints.md >>> @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS" >>> (define_register_constraint "Uph" "PR_HI_REGS" >>> "SVE predicate registers p8 - p15.") >>> >>> +(define_register_constraint "Umv" "MOVEABLE_SYSREGS" >>> + "@internal System Registers suitable for moving rather than requiring an >>> unspec msr") >>> + >>> (define_constraint "c" >>> "@internal The condition code register." >>> (match_operand 0 "cc_register")) >>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >>> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >>> index 459442be155..afb44f83f60 100644 >>> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >>> @@ -1,6 +1,7 @@ >>> /* Test the fp8 ACLE intrinsics family. */ >>> /* { dg-do compile } */ >>> /* { dg-options "-O1 -march=armv8-a" } */ >>> +/* { dg-final { check-function-bodies "**" "" "" } } */ >>> >>> #include <arm_acle.h> >>> >>> @@ -17,4 +18,104 @@ >>> #error "__ARM_FEATURE_FP8 feature macro defined." >>> #endif >>> >>> +/* >>> +**test_write_fpmr_sysreg_asm_64: >>> +** msr fpmr, x0 >>> +** ret >>> +*/ >>> +void >>> +test_write_fpmr_sysreg_asm_64 (uint64_t val) >>> +{ >>> + register uint64_t fpmr asm ("fpmr") = val; >>> + asm volatile ("" ::"Umv"(fpmr)); >>> +} >>> + >>> +/* >>> +**test_write_fpmr_sysreg_asm_32: >>> +** msr fpmr, x0 >>> +** ret >>> +*/ >>> +void >>> +test_write_fpmr_sysreg_asm_32 (uint32_t val) >>> +{ >>> + register uint32_t fpmr asm ("fpmr") = val; >>> + asm volatile ("" ::"Umv"(fpmr)); >>> +} >>> + >>> +/* >>> +**test_write_fpmr_sysreg_asm_16: >>> +** msr fpmr, x0 >>> +** ret >>> +*/ >>> +void >>> +test_write_fpmr_sysreg_asm_16 (uint16_t val) >>> +{ >>> + register uint16_t fpmr asm ("fpmr") = val; >>> + asm volatile ("" ::"Umv"(fpmr)); >>> +} >>> + >>> +/* >>> +**test_write_fpmr_sysreg_asm_8: >>> +** msr fpmr, x0 >>> +** ret >>> +*/ >>> +void >>> +test_write_fpmr_sysreg_asm_8 (uint8_t val) >>> +{ >>> + register uint8_t fpmr asm ("fpmr") = val; >>> + asm volatile ("" ::"Umv"(fpmr)); >>> +} >>> + >>> +/* >>> +**test_read_fpmr_sysreg_asm_64: >>> +** mrs x0, fpmr >>> +** ret >>> +*/ >>> +uint64_t >>> +test_read_fpmr_sysreg_asm_64 () >>> +{ >>> + register uint64_t fpmr asm ("fpmr"); >>> + asm volatile ("" : "=Umv"(fpmr) :); >>> + return fpmr; >>> +} >>> + >>> +/* >>> +**test_read_fpmr_sysreg_asm_32: >>> +** mrs x0, fpmr >>> +** ret >>> +*/ >>> +uint32_t >>> +test_read_fpmr_sysreg_asm_32 () >>> +{ >>> + register uint32_t fpmr asm ("fpmr"); >>> + asm volatile ("" : "=Umv"(fpmr) :); >>> + return fpmr; >>> +} >>> + >>> +/* >>> +**test_read_fpmr_sysreg_asm_16: >>> +** mrs x0, fpmr >>> +** ret >>> +*/ >>> +uint16_t >>> +test_read_fpmr_sysreg_asm_16 () >>> +{ >>> + register uint16_t fpmr asm ("fpmr"); >>> + asm volatile ("" : "=Umv"(fpmr) :); >>> + return fpmr; >>> +} >>> + >>> +/* >>> +**test_read_fpmr_sysreg_asm_8: >>> +** mrs x0, fpmr >>> +** ret >>> +*/ >>> +uint8_t >>> +test_read_fpmr_sysreg_asm_8 () >>> +{ >>> + register uint8_t fpmr asm ("fpmr"); >>> + asm volatile ("" : "=Umv"(fpmr) :); >>> + return fpmr; >>> +} >>> + >>> #pragma GCC pop_options >>