Claudio Bantaloukas <claudio.bantalou...@arm.com> writes: > Unlike most system registers, fpmr can be heavily written to in code that > exercises the fp8 functionality. That is because every fp8 instrinsic call > can potentially change the value of fpmr. > Rather than just use a an unspec, we treat the fpmr system register like
Typo: s/a an/an/ > all other registers and use a move operation to read and write to it. > > We introduce a new class of moveable system registers that, currently, > only accepts fpmr and a new constraint, Umv, that allows us to > selectively use mrs and msr instructions when expanding rtl for them. > Given that there is code that depends on "real" registers coming before > "fake" ones, we introduce a new constant FPM_REGNUM that uses an > existing value and renumber registers below that. > This requires us to update the bitmaps that describe which registers > belong to each register class. > > gcc/ChangeLog: > > * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add > support for MOVEABLE_SYSREGS class. > (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr. > (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class. > (aarch64_class_max_nregs): Likewise. > * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr. > (CALL_REALLY_USED_REGISTERS): Likewise. > (REGISTER_NAMES): Likewise. > (enum reg_class): Add MOVEABLE_SYSREGS class. > (REG_CLASS_NAMES): Likewise. > (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr, > the new MOVEABLE_REGS class and renumbering of registers. > * config/aarch64/aarch64.md: (FPM_REGNUM): added new register > number, reusing old value. > (FFR_REGNUM): Renumber. > (FFRT_REGNUM): Likewise. > (LOWERING_REGNUM): Likewise. > (TPIDR2_BLOCK_REGNUM): Likewise. > (SME_STATE_REGNUM): Likewise. > (TPIDR2_SETUP_REGNUM): Likewise. > (ZA_FREE_REGNUM): Likewise. > (ZA_SAVED_REGNUM): Likewise. > (ZA_REGNUM): Likewise. > (ZT0_REGNUM): Likewise. > (*mov<mode>_aarch64): Add support for moveable sysregs. > (*movsi_aarch64): Likewise. > (*movdi_aarch64): Likewise. > * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/acle/fp8.c: New tests. > [...] > @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64" > [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1 > [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0] > [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 > + [Umv, r ; mrs , * ] msr\t%0, %x1 > + [r, Umv ; mrs , * ] mrs\t%x0, %1 > } > ) > > @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64" > [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 > [w , w ; fmov , fp , 4] fmov\t%s0, %s1 > [w , Ds ; neon_move, simd, 4] << > aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); > + [Umv, r ; mrs , * , 8] msr\t%0, %x1 > + [r, Umv ; mrs , * , 8] mrs\t%x0, %1 The lengths should be 4 rather than 8. > } > "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), > SImode) > && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" > @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64" > [w, w ; fmov , fp , 4] fmov\t%d0, %d1 > [w, Dd ; neon_move, simd, 4] << > aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); > [w, Dx ; neon_move, simd, 8] # > + [Umv, r; mrs , * , 8] msr\t%0, %1 > + [r, Umv; mrs , * , 8] mrs\t%0, %1 Similarly here. > } > "CONST_INT_P (operands[1]) > && REG_P (operands[0]) > [...] > diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > index 459442be155..1a5c3d7e8fd 100644 > --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c > @@ -1,6 +1,7 @@ > /* Test the fp8 ACLE intrinsics family. */ > /* { dg-do compile } */ > /* { dg-options "-O1 -march=armv8-a" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_acle.h> > > @@ -17,4 +18,107 @@ > #error "__ARM_FEATURE_FP8 feature macro defined." > #endif > > +/* > +**test_write_fpmr_sysreg_asm_64: > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_64 (uint64_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_32: > +** uxtw x0, w0 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_32 (uint32_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; By using uint64_t rather than uint32_t, these tests are testing movdi rather than the smaller move patterns. I think it should be uint32_t instead. We should then have just an MSR, without an extension. Similarly for the 16-bit and 8-bit cases. LGTM with those changes, but please give others a day or so to comment. Thanks, Richard > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_16: > +** and x0, x0, 65535 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_16 (uint16_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_write_fpmr_sysreg_asm_8: > +** and x0, x0, 255 > +** msr fpmr, x0 > +** ret > +*/ > +void > +test_write_fpmr_sysreg_asm_8 (uint8_t val) > +{ > + register uint64_t fpmr asm ("fpmr") = val; > + asm volatile ("" ::"Umv"(fpmr)); > +} > + > +/* > +**test_read_fpmr_sysreg_asm_64: > +** mrs x0, fpmr > +** ret > +*/ > +uint64_t > +test_read_fpmr_sysreg_asm_64 () > +{ > + register uint64_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_32: > +** mrs x0, fpmr > +** ret > +*/ > +uint32_t > +test_read_fpmr_sysreg_asm_32 () > +{ > + register uint32_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_16: > +** mrs x0, fpmr > +** ret > +*/ > +uint16_t > +test_read_fpmr_sysreg_asm_16 () > +{ > + register uint16_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > +/* > +**test_read_fpmr_sysreg_asm_8: > +** mrs x0, fpmr > +** ret > +*/ > +uint8_t > +test_read_fpmr_sysreg_asm_8 () > +{ > + register uint8_t fpmr asm ("fpmr"); > + asm volatile ("" : "=Umv"(fpmr) :); > + return fpmr; > +} > + > #pragma GCC pop_options