Hi Claudio, I've left a couple of small comments below.
On 22/07/2024 09:30, Claudio Bantaloukas wrote: > > Unlike most system registers, fpmr can be heavily written to in code that > exercises the fp8 functionality. That is because every fp8 instrinsic call > can potentially change the value of fpmr. > Rather than just use a an unspec, we treat the fpmr system register like > all other registers and use a move operation to read and write to it. > > We introduce a new class of moveable system registers that, currently, > only accepts fpmr and a new constraint, Umv, that allows us to > selectively use mrs and msr instructions when expanding rtl for them. > Given that there is code that depends on "real" registers coming before > "fake" ones, we introduce a new constant FPM_REGNUM that uses an > existing value and renumber registers below that. > This requires us to update the bitmaps that describe which registers > belong to each register class. > > gcc/ChangeLog: > > * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add > support for MOVEABLE_SYSREGS class. > (aarch64_hard_regno_mode_ok): Only allow 64 bit reads and writes > to fpmr. > (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. > (aarch64_class_max_nregs): Likewise. > * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. > (CALL_REALLY_USED_REGISTERS): Likewise. > (REGISTER_NAMES): Likewise. > (enum reg_class): Add MOVEABLE_SYSREGS class. > (REG_CLASS_NAMES): Likewise. > (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, > the new MOVEABLE_REGS class and renumbering of registers. > * config/aarch64/aarch64.md: (FPM_REGNUM): added new register > number, reusing old value. > (FFR_REGNUM): Renumber. > (FFRT_REGNUM): Likewise. > (LOWERING_REGNUM): Likewise. > (TPIDR2_BLOCK_REGNUM): Likewise. > (SME_STATE_REGNUM): Likewise. > (TPIDR2_SETUP_REGNUM): Likewise. > (ZA_FREE_REGNUM): Likewise. > (ZA_SAVED_REGNUM): Likewise. > (ZA_REGNUM): Likewise. > (ZT0_REGNUM): Likewise. > (*mov<mode>_aarch64): Add support for moveable sysregs. > (*movsi_aarch64): Likewise. > (*movdi_aarch64): Likewise. > * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/acle/fp8.c: New tests. > --- > gcc/config/aarch64/aarch64.cc | 9 ++ > gcc/config/aarch64/aarch64.h | 14 ++- > gcc/config/aarch64/aarch64.md | 30 ++++-- > gcc/config/aarch64/constraints.md | 3 + > gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 107 +++++++++++++++++++- > 5 files changed, 146 insertions(+), 17 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 0d9e80d85b2..fa526836c6a 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode > mode) > case PR_HI_REGS: > return mode == VNx32BImode ? 2 : 1; > > + case MOVEABLE_SYSREGS: > case FFR_REGS: > case PR_AND_FFR_REGS: > case FAKE_REGS: > @@ -2045,6 +2046,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, > machine_mode mode) > /* This must have the same size as _Unwind_Word. */ > return mode == DImode; > > + if (regno == FPM_REGNUM) > + /* This must have the same size as the FPMR register. */ > + return mode == QImode || mode == HImode || mode == SImode || mode == > DImode; I'm probably missing something here, but I can't seem to square the comment with the logic. These modes all have different sizes, so how can they all be the same size as the FPMR register? > + > unsigned int vec_flags = aarch64_classify_vector_mode (mode); > if (vec_flags == VEC_SVE_PRED) > return pr_or_ffr_regnum_p (regno); > @@ -12682,6 +12687,9 @@ aarch64_regno_regclass (unsigned regno) > if (PR_REGNUM_P (regno)) > return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; > > + if (regno == FPM_REGNUM) > + return MOVEABLE_SYSREGS; > + > if (regno == FFR_REGNUM || regno == FFRT_REGNUM) > return FFR_REGS; > > @@ -13070,6 +13078,7 @@ aarch64_class_max_nregs (reg_class_t regclass, > machine_mode mode) > case PR_HI_REGS: > return mode == VNx32BImode ? 2 : 1; > > + case MOVEABLE_SYSREGS: > case STACK_REG: > case FFR_REGS: > case PR_AND_FFR_REGS: > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 40793aab814..7385bdaa456 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -522,6 +522,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE > ATTRIBUTE_UNUSED > 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ > 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ > 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ > + 1, /* FPMR */ \ > 1, 1, /* FFR and FFRT */ \ > 1, 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ > } > @@ -546,6 +547,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE > ATTRIBUTE_UNUSED > 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ > 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ > 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ > + 1, /* FPMR */ \ > 1, 1, /* FFR and FFRT */ \ > 0, 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ > } > @@ -563,6 +565,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE > ATTRIBUTE_UNUSED > "sfp", "ap", "cc", "vg", \ > "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ > "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ > + "fpmr", \ > "ffr", "ffrt", \ > "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ > "za_free", "za_saved", "za", "zt0" \ > @@ -774,6 +777,7 @@ enum reg_class > PR_REGS, > FFR_REGS, > PR_AND_FFR_REGS, > + MOVEABLE_SYSREGS, > FAKE_REGS, > ALL_REGS, > LIM_REG_CLASSES /* Last */ > @@ -800,6 +804,7 @@ enum reg_class > "PR_REGS", \ > "FFR_REGS", \ > "PR_AND_FFR_REGS", \ > + "MOVEABLE_SYSREGS", \ > "FAKE_REGS", \ > "ALL_REGS" \ > } > @@ -821,10 +826,11 @@ enum reg_class > { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ > { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ > { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ > - { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ > - { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ > - { 0x00000000, 0x00000000, 0x3fc00000 }, /* FAKE_REGS */ \ > - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ > + { 0x00000000, 0x00000000, 0x00600000 }, /* FFR_REGS */ \ > + { 0x00000000, 0x00000000, 0x006ffff0 }, /* PR_AND_FFR_REGS */ \ > + { 0x00000000, 0x00000000, 0x00100000 }, /* MOVEABLE_SYSREGS */ \ > + { 0x00000000, 0x00000000, 0x7f800000 }, /* FAKE_REGS */ \ > + { 0xffffffff, 0xffffffff, 0x001fffff } /* ALL_REGS */ \ > } > > #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 94ff0eefa77..b654c0b9bb8 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -107,10 +107,14 @@ (define_constants > (P14_REGNUM 82) > (P15_REGNUM 83) > (LAST_SAVED_REGNUM 83) > - (FFR_REGNUM 84) > + > + ;; Floating Point Mode Register, used in FP8 insns. > + (FPM_REGNUM 84) > + > + (FFR_REGNUM 85) > ;; "FFR token": a fake register used for representing the scheduling > ;; restrictions on FFR-related operations. > - (FFRT_REGNUM 85) > + (FFRT_REGNUM 86) > > ;; ---------------------------------------------------------------- > ;; Fake registers > @@ -122,17 +126,17 @@ (define_constants > ;; ABI-related lowering is needed. These placeholders read and > ;; write this register. Instructions that depend on the lowering > ;; read the register. > - (LOWERING_REGNUM 86) > + (LOWERING_REGNUM 87) > > ;; Represents the contents of the current function's TPIDR2 block, > ;; in abstract form. > - (TPIDR2_BLOCK_REGNUM 87) > + (TPIDR2_BLOCK_REGNUM 88) > > ;; Holds the value that the current function wants PSTATE.ZA to be. > ;; The actual value can sometimes vary, because it does not track > ;; changes to PSTATE.ZA that happen during a lazy save and restore. > ;; Those effects are instead tracked by ZA_SAVED_REGNUM. > - (SME_STATE_REGNUM 88) > + (SME_STATE_REGNUM 89) > > ;; Instructions write to this register if they set TPIDR2_EL0 to a > ;; well-defined value. Instructions read from the register if they > @@ -140,14 +144,14 @@ (define_constants > ;; > ;; The register does not model the architected TPIDR2_ELO, just the > ;; current function's management of it. > - (TPIDR2_SETUP_REGNUM 89) > + (TPIDR2_SETUP_REGNUM 90) > > ;; Represents the property "has an incoming lazy save been committed?". > - (ZA_FREE_REGNUM 90) > + (ZA_FREE_REGNUM 91) > > ;; Represents the property "are the current function's ZA contents > ;; stored in the lazy save buffer, rather than in ZA itself?". > - (ZA_SAVED_REGNUM 91) > + (ZA_SAVED_REGNUM 92) > > ;; Represents the contents of the current function's ZA state in > ;; abstract form. At various times in the function, these contents > @@ -155,10 +159,10 @@ (define_constants > ;; > ;; The contents persist even when the architected ZA is off. Private-ZA > ;; functions have no effect on its contents. > - (ZA_REGNUM 92) > + (ZA_REGNUM 93) > > ;; Similarly represents the contents of the current function's ZT0 state. > - (ZT0_REGNUM 93) > + (ZT0_REGNUM 94) > > (FIRST_FAKE_REGNUM LOWERING_REGNUM) > (LAST_FAKE_REGNUM ZT0_REGNUM) > @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" > [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 > [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] > [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 > + [Umv, r ; mrs , * ] msr\t%0, %x1 > + [r, Umv ; mrs , * ] mrs\t%x0, %1 > } > ) > > @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" > [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 > [w , w ; fmov , fp , 4] fmov\t%s0, %s1 > [w , Ds ; neon_move, simd, 4] << > aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); > + [Umv, r ; mrs , * , 8] msr\t%0, %x1 > + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 > } > "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), > SImode) > && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" > @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" > [w, w ; fmov , fp , 4] fmov\t%d0, %d1 > [w, Dd ; neon_move, simd, 4] << > aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); > [w, Dx ; neon_move, simd, 8] # > + [Umv, r; mrs , * , 8] msr\t%0, %x1 > + [r, Umv; mrs , * , 8] mrs\t%x0, %1 I think in the case of this pattern (but not the others) the %x modifier isn't needed since a DImode operand satisfying "r" should get printed as an x register by default. Thanks, Alex > } > "CONST_INT_P (operands[1]) > && REG_P (operands[0]) > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index a2569cea510..0c81fb28f7e 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS" > (define_register_constraint "Uph" "PR_HI_REGS" > "SVE predicate registers p8 - p15.") > > +(define_register_constraint "Umv" "MOVEABLE_SYSREGS" > + "@internal System Registers suitable for moving rather than requiring an > unspec msr") > + > (define_constraint "c" > "@internal The condition code register." > (match_operand 0 "cc_register")) > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > index b774f28c9f0..10fd128d8f9 100644 > --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > @@ -1,14 +1,14 @@ > /* Test the fp8 ACLE intrinsics family. */ > /* { dg-do compile } */ > /* { dg-options "-O1 -march=armv8-a" } */ > -/* { dg-final { check-function-bodies "**" "" } } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #ifdef __ARM_FEATURE_FP8 > #error "__ARM_FEATURE_FP8 feature macro defined." > #endif > > #pragma GCC push_options > -#pragma GCC target ("arch=armv9.4-a+fp8") > +#pragma GCC target("arch=armv9.4-a+fp8") > > #include <arm_acle.h> > > @@ -16,4 +16,105 @@ > #error "__ARM_FEATURE_FP8 feature macro not defined." > #endif > > -#pragma GCC pop_options > +/* > +**test_write_fpmr_sysreg_asm_64: > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_64 (uint64_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_32: > +** uxtw x0, w0 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_32 (uint32_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_16: > +** and x0, x0, 65535 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_16 (uint16_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_8: > +** and x0, x0, 255 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_8 (uint8_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_read_fpmr_sysreg_asm_64: > +** mrs x0, fpmr > +** ret > +*/ > +uint64_t > +test_read_fpmr_sysreg_asm_64 () > +{ > + register uint64_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_32: > +** mrs x0, fpmr > +** ret > +*/ > +uint32_t > +test_read_fpmr_sysreg_asm_32 () > +{ > + register uint32_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_16: > +** mrs x0, fpmr > +** ret > +*/ > +uint16_t > +test_read_fpmr_sysreg_asm_16 () > +{ > + register uint16_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_8: > +** mrs x0, fpmr > +** ret > +*/ > +uint8_t > +test_read_fpmr_sysreg_asm_8 () > +{ > + register uint8_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +}