On 31/07/2024 08:57, Kyrylo Tkachov wrote: > Hi Claudio, > >> On 31 Jul 2024, at 08:29, Claudio Bantaloukas <claudio.bantalou...@arm.com> >> wrote: >> >> External email: Use caution opening links or attachments >> >> >> Unlike most system registers, fpmr can be heavily written to in code that >> exercises the fp8 functionality. That is because every fp8 instrinsic call >> can potentially change the value of fpmr. >> Rather than just use an unspec, we treat the fpmr system register like >> all other registers and use a move operation to read and write to it. >> >> We introduce a new class of moveable system registers that, currently, >> only accepts fpmr and a new constraint, Umv, that allows us to >> selectively use mrs and msr instructions when expanding rtl for them. >> Given that there is code that depends on "real" registers coming before >> "fake" ones, we introduce a new constant FPM_REGNUM that uses an >> existing value and renumber registers below that. >> This requires us to update the bitmaps that describe which registers >> belong to each register class. > > So I like the approach though I’ll let Richard review the implementation > details here. > My only slight concern here is compatibility with LLVM. I notice that LLVM > doesn’t accept the test case you’ve included as it doesn’t understand “fpmr” > in its inline assembly. It also doesn’t support the new constraint, of course. > Do you know if there are plans to teach LLVM these inline assembly constructs > to avoid creating GCC-only sources for fp8?
Hi Kyrill, I asked and got confirmation that a patch to add fpmr as a register should land soon. Cheers, Claudio > Thanks, > Kyrill > > >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add >> support for MOVEABLE_SYSREGS class. >> (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. >> (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. >> (aarch64_class_max_nregs): Likewise. >> * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. >> (CALL_REALLY_USED_REGISTERS): Likewise. >> (REGISTER_NAMES): Likewise. >> (enum reg_class): Add MOVEABLE_SYSREGS class. >> (REG_CLASS_NAMES): Likewise. >> (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, >> the new MOVEABLE_REGS class and renumbering of registers. >> * config/aarch64/aarch64.md: (FPM_REGNUM): added new register >> number, reusing old value. >> (FFR_REGNUM): Renumber. >> (FFRT_REGNUM): Likewise. >> (LOWERING_REGNUM): Likewise. >> (TPIDR2_BLOCK_REGNUM): Likewise. >> (SME_STATE_REGNUM): Likewise. >> (TPIDR2_SETUP_REGNUM): Likewise. >> (ZA_FREE_REGNUM): Likewise. >> (ZA_SAVED_REGNUM): Likewise. >> (ZA_REGNUM): Likewise. >> (ZT0_REGNUM): Likewise. >> (*mov<mode>_aarch64): Add support for moveable sysregs. >> (*movsi_aarch64): Likewise. >> (*movdi_aarch64): Likewise. >> * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/aarch64/acle/fp8.c: New tests. >> --- >> gcc/config/aarch64/aarch64.cc | 8 ++ >> gcc/config/aarch64/aarch64.h | 14 ++- >> gcc/config/aarch64/aarch64.md | 30 ++++-- >> gcc/config/aarch64/constraints.md | 3 + >> gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 101 ++++++++++++++++++++ >> 5 files changed, 142 insertions(+), 14 deletions(-) >> >> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc >> index e0cf382998c..9810f2c0390 100644 >> --- a/gcc/config/aarch64/aarch64.cc >> +++ b/gcc/config/aarch64/aarch64.cc >> @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode >> mode) >> case PR_HI_REGS: >> return mode == VNx32BImode ? 2 : 1; >> >> + case MOVEABLE_SYSREGS: >> case FFR_REGS: >> case PR_AND_FFR_REGS: >> case FAKE_REGS: >> @@ -2045,6 +2046,9 @@ aarch64_hard_regno_mode_ok (unsigned regno, >> machine_mode mode) >> /* This must have the same size as _Unwind_Word. */ >> return mode == DImode; >> >> + if (regno == FPM_REGNUM) >> + return mode == QImode || mode == HImode || mode == SImode || mode == >> DImode; >> + >> unsigned int vec_flags = aarch64_classify_vector_mode (mode); >> if (vec_flags == VEC_SVE_PRED) >> return pr_or_ffr_regnum_p (regno); >> @@ -12680,6 +12684,9 @@ aarch64_regno_regclass (unsigned regno) >> if (PR_REGNUM_P (regno)) >> return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; >> >> + if (regno == FPM_REGNUM) >> + return MOVEABLE_SYSREGS; >> + >> if (regno == FFR_REGNUM || regno == FFRT_REGNUM) >> return FFR_REGS; >> >> @@ -13068,6 +13075,7 @@ aarch64_class_max_nregs (reg_class_t regclass, >> machine_mode mode) >> case PR_HI_REGS: >> return mode == VNx32BImode ? 2 : 1; >> >> + case MOVEABLE_SYSREGS: >> case STACK_REG: >> case FFR_REGS: >> case PR_AND_FFR_REGS: >> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h >> index 2e75c6b81e2..2dfb999bea5 100644 >> --- a/gcc/config/aarch64/aarch64.h >> +++ b/gcc/config/aarch64/aarch64.h >> @@ -523,6 +523,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >> ATTRIBUTE_UNUSED >> 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ >> 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ >> 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ >> + 1, /* FPMR */ \ >> 1, 1, /* FFR and FFRT */ \ >> 1, 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ >> } >> @@ -547,6 +548,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >> ATTRIBUTE_UNUSED >> 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ >> 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ >> 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ >> + 1, /* FPMR */ \ >> 1, 1, /* FFR and FFRT */ \ >> 0, 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ >> } >> @@ -564,6 +566,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE >> ATTRIBUTE_UNUSED >> "sfp", "ap", "cc", "vg", \ >> "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ >> "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ >> + "fpmr", \ >> "ffr", "ffrt", \ >> "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ >> "za_free", "za_saved", "za", "zt0" \ >> @@ -775,6 +778,7 @@ enum reg_class >> PR_REGS, >> FFR_REGS, >> PR_AND_FFR_REGS, >> + MOVEABLE_SYSREGS, >> FAKE_REGS, >> ALL_REGS, >> LIM_REG_CLASSES /* Last */ >> @@ -801,6 +805,7 @@ enum reg_class >> "PR_REGS", \ >> "FFR_REGS", \ >> "PR_AND_FFR_REGS", \ >> + "MOVEABLE_SYSREGS", \ >> "FAKE_REGS", \ >> "ALL_REGS" \ >> } >> @@ -822,10 +827,11 @@ enum reg_class >> { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ >> { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ >> { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ >> - { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ >> - { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ >> - { 0x00000000, 0x00000000, 0x3fc00000 }, /* FAKE_REGS */ \ >> - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ >> + { 0x00000000, 0x00000000, 0x00600000 }, /* FFR_REGS */ \ >> + { 0x00000000, 0x00000000, 0x006ffff0 }, /* PR_AND_FFR_REGS */ \ >> + { 0x00000000, 0x00000000, 0x00100000 }, /* MOVEABLE_SYSREGS */ \ >> + { 0x00000000, 0x00000000, 0x7f800000 }, /* FAKE_REGS */ \ >> + { 0xffffffff, 0xffffffff, 0x001fffff } /* ALL_REGS */ \ >> } >> >> #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) >> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md >> index ed29127dafb..ed1bd2ede7d 100644 >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -107,10 +107,14 @@ (define_constants >> (P14_REGNUM 82) >> (P15_REGNUM 83) >> (LAST_SAVED_REGNUM 83) >> - (FFR_REGNUM 84) >> + >> + ;; Floating Point Mode Register, used in FP8 insns. >> + (FPM_REGNUM 84) >> + >> + (FFR_REGNUM 85) >> ;; "FFR token": a fake register used for representing the scheduling >> ;; restrictions on FFR-related operations. >> - (FFRT_REGNUM 85) >> + (FFRT_REGNUM 86) >> >> ;; ---------------------------------------------------------------- >> ;; Fake registers >> @@ -122,17 +126,17 @@ (define_constants >> ;; ABI-related lowering is needed. These placeholders read and >> ;; write this register. Instructions that depend on the lowering >> ;; read the register. >> - (LOWERING_REGNUM 86) >> + (LOWERING_REGNUM 87) >> >> ;; Represents the contents of the current function's TPIDR2 block, >> ;; in abstract form. >> - (TPIDR2_BLOCK_REGNUM 87) >> + (TPIDR2_BLOCK_REGNUM 88) >> >> ;; Holds the value that the current function wants PSTATE.ZA to be. >> ;; The actual value can sometimes vary, because it does not track >> ;; changes to PSTATE.ZA that happen during a lazy save and restore. >> ;; Those effects are instead tracked by ZA_SAVED_REGNUM. >> - (SME_STATE_REGNUM 88) >> + (SME_STATE_REGNUM 89) >> >> ;; Instructions write to this register if they set TPIDR2_EL0 to a >> ;; well-defined value. Instructions read from the register if they >> @@ -140,14 +144,14 @@ (define_constants >> ;; >> ;; The register does not model the architected TPIDR2_ELO, just the >> ;; current function's management of it. >> - (TPIDR2_SETUP_REGNUM 89) >> + (TPIDR2_SETUP_REGNUM 90) >> >> ;; Represents the property "has an incoming lazy save been committed?". >> - (ZA_FREE_REGNUM 90) >> + (ZA_FREE_REGNUM 91) >> >> ;; Represents the property "are the current function's ZA contents >> ;; stored in the lazy save buffer, rather than in ZA itself?". >> - (ZA_SAVED_REGNUM 91) >> + (ZA_SAVED_REGNUM 92) >> >> ;; Represents the contents of the current function's ZA state in >> ;; abstract form. At various times in the function, these contents >> @@ -155,10 +159,10 @@ (define_constants >> ;; >> ;; The contents persist even when the architected ZA is off. Private-ZA >> ;; functions have no effect on its contents. >> - (ZA_REGNUM 92) >> + (ZA_REGNUM 93) >> >> ;; Similarly represents the contents of the current function's ZT0 >> state. >> - (ZT0_REGNUM 93) >> + (ZT0_REGNUM 94) >> >> (FIRST_FAKE_REGNUM LOWERING_REGNUM) >> (LAST_FAKE_REGNUM ZT0_REGNUM) >> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" >> [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 >> [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] >> [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 >> + [Umv, r ; mrs , * ] msr\t%0, %x1 >> + [r, Umv ; mrs , * ] mrs\t%x0, %1 >> } >> ) >> >> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" >> [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 >> [w , w ; fmov , fp , 4] fmov\t%s0, %s1 >> [w , Ds ; neon_move, simd, 4] << >> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); >> + [Umv, r ; mrs , * , 4] msr\t%0, %x1 >> + [r, Umv ; mrs , * , 4] mrs\t%x0, %1 >> } >> "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), >> SImode) >> && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" >> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" >> [w, w ; fmov , fp , 4] fmov\t%d0, %d1 >> [w, Dd ; neon_move, simd, 4] << >> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); >> [w, Dx ; neon_move, simd, 8] # >> + [Umv, r; mrs , * , 4] msr\t%0, %1 >> + [r, Umv; mrs , * , 4] mrs\t%0, %1 >> } >> "CONST_INT_P (operands[1]) >> && REG_P (operands[0]) >> diff --git a/gcc/config/aarch64/constraints.md >> b/gcc/config/aarch64/constraints.md >> index a2569cea510..0c81fb28f7e 100644 >> --- a/gcc/config/aarch64/constraints.md >> +++ b/gcc/config/aarch64/constraints.md >> @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS" >> (define_register_constraint "Uph" "PR_HI_REGS" >> "SVE predicate registers p8 - p15.") >> >> +(define_register_constraint "Umv" "MOVEABLE_SYSREGS" >> + "@internal System Registers suitable for moving rather than requiring an >> unspec msr") >> + >> (define_constraint "c" >> "@internal The condition code register." >> (match_operand 0 "cc_register")) >> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> index 459442be155..afb44f83f60 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> @@ -1,6 +1,7 @@ >> /* Test the fp8 ACLE intrinsics family. */ >> /* { dg-do compile } */ >> /* { dg-options "-O1 -march=armv8-a" } */ >> +/* { dg-final { check-function-bodies "**" "" "" } } */ >> >> #include <arm_acle.h> >> >> @@ -17,4 +18,104 @@ >> #error "__ARM_FEATURE_FP8 feature macro defined." >> #endif >> >> +/* >> +**test_write_fpmr_sysreg_asm_64: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_64 (uint64_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_32: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_32 (uint32_t val) >> +{ >> + register uint32_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_16: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_16 (uint16_t val) >> +{ >> + register uint16_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_8: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_8 (uint8_t val) >> +{ >> + register uint8_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_64: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint64_t >> +test_read_fpmr_sysreg_asm_64 () >> +{ >> + register uint64_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_32: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint32_t >> +test_read_fpmr_sysreg_asm_32 () >> +{ >> + register uint32_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_16: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint16_t >> +test_read_fpmr_sysreg_asm_16 () >> +{ >> + register uint16_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_8: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint8_t >> +test_read_fpmr_sysreg_asm_8 () >> +{ >> + register uint8_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> #pragma GCC pop_options >