On 29/07/2024 13:13, Richard Sandiford wrote: > Claudio Bantaloukas <claudio.bantalou...@arm.com> writes: >> Unlike most system registers, fpmr can be heavily written to in code that >> exercises the fp8 functionality. That is because every fp8 instrinsic call >> can potentially change the value of fpmr. >> Rather than just use a an unspec, we treat the fpmr system register like > > Typo: s/a an/an/ Thanks for the catch, will repost along with the requested changes below
Cheers, Claudio > >> all other registers and use a move operation to read and write to it. >> >> We introduce a new class of moveable system registers that, currently, >> only accepts fpmr and a new constraint, Umv, that allows us to >> selectively use mrs and msr instructions when expanding rtl for them. >> Given that there is code that depends on "real" registers coming before >> "fake" ones, we introduce a new constant FPM_REGNUM that uses an >> existing value and renumber registers below that. >> This requires us to update the bitmaps that describe which registers >> belong to each register class. >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add >> support for MOVEABLE_SYSREGS class. >> (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. >> (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. >> (aarch64_class_max_nregs): Likewise. >> * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. >> (CALL_REALLY_USED_REGISTERS): Likewise. >> (REGISTER_NAMES): Likewise. >> (enum reg_class): Add MOVEABLE_SYSREGS class. >> (REG_CLASS_NAMES): Likewise. >> (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, >> the new MOVEABLE_REGS class and renumbering of registers. >> * config/aarch64/aarch64.md: (FPM_REGNUM): added new register >> number, reusing old value. >> (FFR_REGNUM): Renumber. >> (FFRT_REGNUM): Likewise. >> (LOWERING_REGNUM): Likewise. >> (TPIDR2_BLOCK_REGNUM): Likewise. >> (SME_STATE_REGNUM): Likewise. >> (TPIDR2_SETUP_REGNUM): Likewise. >> (ZA_FREE_REGNUM): Likewise. >> (ZA_SAVED_REGNUM): Likewise. >> (ZA_REGNUM): Likewise. >> (ZT0_REGNUM): Likewise. >> (*mov<mode>_aarch64): Add support for moveable sysregs. >> (*movsi_aarch64): Likewise. >> (*movdi_aarch64): Likewise. >> * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/aarch64/acle/fp8.c: New tests. >> [...] >> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" >> [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 >> [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] >> [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 >> + [Umv, r ; mrs , * ] msr\t%0, %x1 >> + [r, Umv ; mrs , * ] mrs\t%x0, %1 >> } >> ) >> >> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" >> [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 >> [w , w ; fmov , fp , 4] fmov\t%s0, %s1 >> [w , Ds ; neon_move, simd, 4] << >> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); >> + [Umv, r ; mrs , * , 8] msr\t%0, %x1 >> + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 > > The lengths should be 4 rather than 8. > >> } >> "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), >> SImode) >> && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" >> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" >> [w, w ; fmov , fp , 4] fmov\t%d0, %d1 >> [w, Dd ; neon_move, simd, 4] << >> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); >> [w, Dx ; neon_move, simd, 8] # >> + [Umv, r; mrs , * , 8] msr\t%0, %1 >> + [r, Umv; mrs , * , 8] mrs\t%0, %1 > > Similarly here. > >> } >> "CONST_INT_P (operands[1]) >> && REG_P (operands[0]) >> [...] >> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> index 459442be155..1a5c3d7e8fd 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c >> @@ -1,6 +1,7 @@ >> /* Test the fp8 ACLE intrinsics family. */ >> /* { dg-do compile } */ >> /* { dg-options "-O1 -march=armv8-a" } */ >> +/* { dg-final { check-function-bodies "**" "" "" } } */ >> >> #include <arm_acle.h> >> >> @@ -17,4 +18,107 @@ >> #error "__ARM_FEATURE_FP8 feature macro defined." >> #endif >> >> +/* >> +**test_write_fpmr_sysreg_asm_64: >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_64 (uint64_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_32: >> +** uxtw x0, w0 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_32 (uint32_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; > > By using uint64_t rather than uint32_t, these tests are testing movdi > rather than the smaller move patterns. I think it should be uint32_t > instead. We should then have just an MSR, without an extension. > > Similarly for the 16-bit and 8-bit cases. > > LGTM with those changes, but please give others a day or so to comment. > > Thanks, > Richard > > >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_16: >> +** and x0, x0, 65535 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_16 (uint16_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_write_fpmr_sysreg_asm_8: >> +** and x0, x0, 255 >> +** msr fpmr, x0 >> +** ret >> +*/ >> +void >> +test_write_fpmr_sysreg_asm_8 (uint8_t val) >> +{ >> + register uint64_t fpmr asm ("fpmr") = val; >> + asm volatile ("" ::"Umv"(fpmr)); >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_64: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint64_t >> +test_read_fpmr_sysreg_asm_64 () >> +{ >> + register uint64_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_32: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint32_t >> +test_read_fpmr_sysreg_asm_32 () >> +{ >> + register uint32_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_16: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint16_t >> +test_read_fpmr_sysreg_asm_16 () >> +{ >> + register uint16_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> +/* >> +**test_read_fpmr_sysreg_asm_8: >> +** mrs x0, fpmr >> +** ret >> +*/ >> +uint8_t >> +test_read_fpmr_sysreg_asm_8 () >> +{ >> + register uint8_t fpmr asm ("fpmr"); >> + asm volatile ("" : "=Umv"(fpmr) :); >> + return fpmr; >> +} >> + >> #pragma GCC pop_options