On 29/07/2024 13:13, Richard Sandiford wrote:
> Claudio Bantaloukas <claudio.bantalou...@arm.com> writes:
>> Unlike most system registers, fpmr can be heavily written to in code that
>> exercises the fp8 functionality. That is because every fp8 instrinsic call
>> can potentially change the value of fpmr.
>> Rather than just use a an unspec, we treat the fpmr system register like
> 
> Typo: s/a an/an/
Thanks for the catch, will repost along with the requested changes below

Cheers,
Claudio

> 
>> all other registers and use a move operation to read and write to it.
>>
>> We introduce a new class of moveable system registers that, currently,
>> only accepts fpmr and a new constraint, Umv, that allows us to
>> selectively use mrs and msr instructions when expanding rtl for them.
>> Given that there is code that depends on "real" registers coming before
>> "fake" ones, we introduce a new constant FPM_REGNUM that uses an
>> existing value and renumber registers below that.
>> This requires us to update the bitmaps that describe which registers
>> belong to each register class.
>>
>> gcc/ChangeLog:
>>
>>      * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add
>>      support for MOVEABLE_SYSREGS class.
>>      (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr.
>>      (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class.
>>      (aarch64_class_max_nregs): Likewise.
>>      * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr.
>>      (CALL_REALLY_USED_REGISTERS): Likewise.
>>      (REGISTER_NAMES): Likewise.
>>      (enum reg_class): Add MOVEABLE_SYSREGS class.
>>      (REG_CLASS_NAMES): Likewise.
>>      (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr,
>>      the new MOVEABLE_REGS class and renumbering of registers.
>>      * config/aarch64/aarch64.md: (FPM_REGNUM): added new register
>>      number, reusing old value.
>>      (FFR_REGNUM): Renumber.
>>      (FFRT_REGNUM): Likewise.
>>      (LOWERING_REGNUM): Likewise.
>>      (TPIDR2_BLOCK_REGNUM): Likewise.
>>      (SME_STATE_REGNUM): Likewise.
>>      (TPIDR2_SETUP_REGNUM): Likewise.
>>      (ZA_FREE_REGNUM): Likewise.
>>      (ZA_SAVED_REGNUM): Likewise.
>>      (ZA_REGNUM): Likewise.
>>      (ZT0_REGNUM): Likewise.
>>      (*mov<mode>_aarch64): Add support for moveable sysregs.
>>      (*movsi_aarch64): Likewise.
>>      (*movdi_aarch64): Likewise.
>>      * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint.
>>
>> gcc/testsuite/ChangeLog:
>>
>>      * gcc.target/aarch64/acle/fp8.c: New tests.
>> [...]
>> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64"
>>        [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
>>        [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
>>        [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
>> +     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
>> +     [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
>>     }
>>   )
>>   
>> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64"
>>        [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>>        [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>>        [w  , Ds ; neon_move, simd, 4] << 
>> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
>> +     [Umv, r  ; mrs      , *   , 8] msr\t%0, %x1
>> +     [r, Umv  ; mrs      , *   , 8] mrs\t%x0, %1
> 
> The lengths should be 4 rather than 8.
> 
>>     }
>>     "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
>> SImode)
>>       && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64"
>>        [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
>>        [w, Dd ; neon_move, simd, 4] << 
>> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
>>        [w, Dx ; neon_move, simd, 8] #
>> +     [Umv, r; mrs      , *   , 8] msr\t%0, %1
>> +     [r, Umv; mrs      , *   , 8] mrs\t%0, %1
> 
> Similarly here.
> 
>>     }
>>     "CONST_INT_P (operands[1])
>>      && REG_P (operands[0])
>> [...]
>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c 
>> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> index 459442be155..1a5c3d7e8fd 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> @@ -1,6 +1,7 @@
>>   /* Test the fp8 ACLE intrinsics family.  */
>>   /* { dg-do compile } */
>>   /* { dg-options "-O1 -march=armv8-a" } */
>> +/* { dg-final { check-function-bodies "**" "" "" } } */
>>   
>>   #include <arm_acle.h>
>>   
>> @@ -17,4 +18,107 @@
>>   #error "__ARM_FEATURE_FP8 feature macro defined."
>>   #endif
>>   
>> +/*
>> +**test_write_fpmr_sysreg_asm_64:
>> +**  msr     fpmr, x0
>> +**  ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_64 (uint64_t val)
>> +{
>> +  register uint64_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_32:
>> +**  uxtw    x0, w0
>> +**  msr     fpmr, x0
>> +**  ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_32 (uint32_t val)
>> +{
>> +  register uint64_t fpmr asm ("fpmr") = val;
> 
> By using uint64_t rather than uint32_t, these tests are testing movdi
> rather than the smaller move patterns.  I think it should be uint32_t
> instead.  We should then have just an MSR, without an extension.
> 
> Similarly for the 16-bit and 8-bit cases.
> 
> LGTM with those changes, but please give others a day or so to comment.
> 
> Thanks,
> Richard
> 
> 
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_16:
>> +**  and     x0, x0, 65535
>> +**  msr     fpmr, x0
>> +**  ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_16 (uint16_t val)
>> +{
>> +  register uint64_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_8:
>> +**  and     x0, x0, 255
>> +**  msr     fpmr, x0
>> +**  ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_8 (uint8_t val)
>> +{
>> +  register uint64_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_64:
>> +**  mrs     x0, fpmr
>> +**  ret
>> +*/
>> +uint64_t
>> +test_read_fpmr_sysreg_asm_64 ()
>> +{
>> +  register uint64_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_32:
>> +**  mrs     x0, fpmr
>> +**  ret
>> +*/
>> +uint32_t
>> +test_read_fpmr_sysreg_asm_32 ()
>> +{
>> +  register uint32_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_16:
>> +**  mrs     x0, fpmr
>> +**  ret
>> +*/
>> +uint16_t
>> +test_read_fpmr_sysreg_asm_16 ()
>> +{
>> +  register uint16_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_8:
>> +**  mrs     x0, fpmr
>> +**  ret
>> +*/
>> +uint8_t
>> +test_read_fpmr_sysreg_asm_8 ()
>> +{
>> +  register uint8_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>>   #pragma GCC pop_options

Reply via email to