Claudio Bantaloukas <claudio.bantalou...@arm.com> writes:
> Unlike most system registers, fpmr can be heavily written to in code that
> exercises the fp8 functionality. That is because every fp8 instrinsic call
> can potentially change the value of fpmr.
> Rather than just use a an unspec, we treat the fpmr system register like

Typo: s/a an/an/

> all other registers and use a move operation to read and write to it.
>
> We introduce a new class of moveable system registers that, currently,
> only accepts fpmr and a new constraint, Umv, that allows us to
> selectively use mrs and msr instructions when expanding rtl for them.
> Given that there is code that depends on "real" registers coming before
> "fake" ones, we introduce a new constant FPM_REGNUM that uses an
> existing value and renumber registers below that.
> This requires us to update the bitmaps that describe which registers
> belong to each register class.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add
>       support for MOVEABLE_SYSREGS class.
>       (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr.
>       (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class.
>       (aarch64_class_max_nregs): Likewise.
>       * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr.
>       (CALL_REALLY_USED_REGISTERS): Likewise.
>       (REGISTER_NAMES): Likewise.
>       (enum reg_class): Add MOVEABLE_SYSREGS class.
>       (REG_CLASS_NAMES): Likewise.
>       (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr,
>       the new MOVEABLE_REGS class and renumbering of registers.
>       * config/aarch64/aarch64.md: (FPM_REGNUM): added new register
>       number, reusing old value.
>       (FFR_REGNUM): Renumber.
>       (FFRT_REGNUM): Likewise.
>       (LOWERING_REGNUM): Likewise.
>       (TPIDR2_BLOCK_REGNUM): Likewise.
>       (SME_STATE_REGNUM): Likewise.
>       (TPIDR2_SETUP_REGNUM): Likewise.
>       (ZA_FREE_REGNUM): Likewise.
>       (ZA_SAVED_REGNUM): Likewise.
>       (ZA_REGNUM): Likewise.
>       (ZT0_REGNUM): Likewise.
>       (*mov<mode>_aarch64): Add support for moveable sysregs.
>       (*movsi_aarch64): Likewise.
>       (*movdi_aarch64): Likewise.
>       * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/acle/fp8.c: New tests.
> [...]
> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64"
>       [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
>       [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
>       [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
> +     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
> +     [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
>    }
>  )
>  
> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64"
>       [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>       [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>       [w  , Ds ; neon_move, simd, 4] << 
> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
> +     [Umv, r  ; mrs      , *   , 8] msr\t%0, %x1
> +     [r, Umv  ; mrs      , *   , 8] mrs\t%x0, %1

The lengths should be 4 rather than 8.

>    }
>    "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
> SImode)
>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64"
>       [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
>       [w, Dd ; neon_move, simd, 4] << 
> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
>       [w, Dx ; neon_move, simd, 8] #
> +     [Umv, r; mrs      , *   , 8] msr\t%0, %1
> +     [r, Umv; mrs      , *   , 8] mrs\t%0, %1

Similarly here.

>    }
>    "CONST_INT_P (operands[1])
>     && REG_P (operands[0])
> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c 
> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> index 459442be155..1a5c3d7e8fd 100644
> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> @@ -1,6 +1,7 @@
>  /* Test the fp8 ACLE intrinsics family.  */
>  /* { dg-do compile } */
>  /* { dg-options "-O1 -march=armv8-a" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
>  
>  #include <arm_acle.h>
>  
> @@ -17,4 +18,107 @@
>  #error "__ARM_FEATURE_FP8 feature macro defined."
>  #endif
>  
> +/*
> +**test_write_fpmr_sysreg_asm_64:
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_64 (uint64_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_32:
> +**   uxtw    x0, w0
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_32 (uint32_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;

By using uint64_t rather than uint32_t, these tests are testing movdi
rather than the smaller move patterns.  I think it should be uint32_t
instead.  We should then have just an MSR, without an extension.

Similarly for the 16-bit and 8-bit cases.

LGTM with those changes, but please give others a day or so to comment.

Thanks,
Richard


> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_16:
> +**   and     x0, x0, 65535
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_16 (uint16_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_8:
> +**   and     x0, x0, 255
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_8 (uint8_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_64:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint64_t
> +test_read_fpmr_sysreg_asm_64 ()
> +{
> +  register uint64_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_32:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint32_t
> +test_read_fpmr_sysreg_asm_32 ()
> +{
> +  register uint32_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_16:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint16_t
> +test_read_fpmr_sysreg_asm_16 ()
> +{
> +  register uint16_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_8:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint8_t
> +test_read_fpmr_sysreg_asm_8 ()
> +{
> +  register uint8_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
>  #pragma GCC pop_options

Reply via email to