Hi Claudio,

I've left a couple of small comments below.

On 22/07/2024 09:30, Claudio Bantaloukas wrote:
> 
> Unlike most system registers, fpmr can be heavily written to in code that
> exercises the fp8 functionality. That is because every fp8 instrinsic call
> can potentially change the value of fpmr.
> Rather than just use a an unspec, we treat the fpmr system register like
> all other registers and use a move operation to read and write to it.
> 
> We introduce a new class of moveable system registers that, currently,
> only accepts fpmr and a new constraint, Umv, that allows us to
> selectively use mrs and msr instructions when expanding rtl for them.
> Given that there is code that depends on "real" registers coming before
> "fake" ones, we introduce a new constant FPM_REGNUM that uses an
> existing value and renumber registers below that.
> This requires us to update the bitmaps that describe which registers
> belong to each register class.
> 
> gcc/ChangeLog:
> 
>       * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add
>       support for MOVEABLE_SYSREGS class.
>       (aarch64_hard_regno_mode_ok): Only allow 64 bit reads and writes
>       to fpmr.
>       (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class.
>       (aarch64_class_max_nregs): Likewise.
>       * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr.
>       (CALL_REALLY_USED_REGISTERS): Likewise.
>       (REGISTER_NAMES): Likewise.
>       (enum reg_class): Add MOVEABLE_SYSREGS class.
>       (REG_CLASS_NAMES): Likewise.
>       (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr,
>       the new MOVEABLE_REGS class and renumbering of registers.
>       * config/aarch64/aarch64.md: (FPM_REGNUM): added new register
>       number, reusing old value.
>       (FFR_REGNUM): Renumber.
>       (FFRT_REGNUM): Likewise.
>       (LOWERING_REGNUM): Likewise.
>       (TPIDR2_BLOCK_REGNUM): Likewise.
>       (SME_STATE_REGNUM): Likewise.
>       (TPIDR2_SETUP_REGNUM): Likewise.
>       (ZA_FREE_REGNUM): Likewise.
>       (ZA_SAVED_REGNUM): Likewise.
>       (ZA_REGNUM): Likewise.
>       (ZT0_REGNUM): Likewise.
>       (*mov<mode>_aarch64): Add support for moveable sysregs.
>       (*movsi_aarch64): Likewise.
>       (*movdi_aarch64): Likewise.
>       * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/acle/fp8.c: New tests.
> ---
>  gcc/config/aarch64/aarch64.cc               |   9 ++
>  gcc/config/aarch64/aarch64.h                |  14 ++-
>  gcc/config/aarch64/aarch64.md               |  30 ++++--
>  gcc/config/aarch64/constraints.md           |   3 +
>  gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 107 +++++++++++++++++++-
>  5 files changed, 146 insertions(+), 17 deletions(-)
> 

> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 0d9e80d85b2..fa526836c6a 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode 
> mode)
>      case PR_HI_REGS:
>        return mode == VNx32BImode ? 2 : 1;
>  
> +    case MOVEABLE_SYSREGS:
>      case FFR_REGS:
>      case PR_AND_FFR_REGS:
>      case FAKE_REGS:
> @@ -2045,6 +2046,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, 
> machine_mode mode)
>      /* This must have the same size as _Unwind_Word.  */
>      return mode == DImode;
>  
> +  if (regno == FPM_REGNUM)
> +    /* This must have the same size as the FPMR register.  */
> +    return mode == QImode || mode == HImode || mode == SImode || mode == 
> DImode;

I'm probably missing something here, but I can't seem to square the
comment with the logic.  These modes all have different sizes, so how
can they all be the same size as the FPMR register?

> +
>    unsigned int vec_flags = aarch64_classify_vector_mode (mode);
>    if (vec_flags == VEC_SVE_PRED)
>      return pr_or_ffr_regnum_p (regno);
> @@ -12682,6 +12687,9 @@ aarch64_regno_regclass (unsigned regno)
>    if (PR_REGNUM_P (regno))
>      return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
>  
> +  if (regno == FPM_REGNUM)
> +    return MOVEABLE_SYSREGS;
> +
>    if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
>      return FFR_REGS;
>  
> @@ -13070,6 +13078,7 @@ aarch64_class_max_nregs (reg_class_t regclass, 
> machine_mode mode)
>      case PR_HI_REGS:
>        return mode == VNx32BImode ? 2 : 1;
>  
> +    case MOVEABLE_SYSREGS:
>      case STACK_REG:
>      case FFR_REGS:
>      case PR_AND_FFR_REGS:
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 40793aab814..7385bdaa456 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -522,6 +522,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
> ATTRIBUTE_UNUSED
>      1, 1, 1, 1,                      /* SFP, AP, CC, VG */   \
>      0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
>      0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
> +    1,                               /* FPMR */              \
>      1, 1,                    /* FFR and FFRT */      \
>      1, 1, 1, 1, 1, 1, 1, 1   /* Fake registers */    \
>    }
> @@ -546,6 +547,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
> ATTRIBUTE_UNUSED
>      1, 1, 1, 0,                      /* SFP, AP, CC, VG */   \
>      1, 1, 1, 1,   1, 1, 1, 1,        /* P0 - P7 */           \
>      1, 1, 1, 1,   1, 1, 1, 1,        /* P8 - P15 */          \
> +    1,                               /* FPMR */              \
>      1, 1,                    /* FFR and FFRT */      \
>      0, 0, 0, 0, 0, 0, 0, 0   /* Fake registers */    \
>    }
> @@ -563,6 +565,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
> ATTRIBUTE_UNUSED
>      "sfp", "ap",  "cc",  "vg",                                       \
>      "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",   \
>      "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",  \
> +    "fpmr",                                                  \
>      "ffr", "ffrt",                                           \
>      "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \
>      "za_free", "za_saved", "za", "zt0"                               \
> @@ -774,6 +777,7 @@ enum reg_class
>    PR_REGS,
>    FFR_REGS,
>    PR_AND_FFR_REGS,
> +  MOVEABLE_SYSREGS,
>    FAKE_REGS,
>    ALL_REGS,
>    LIM_REG_CLASSES            /* Last */
> @@ -800,6 +804,7 @@ enum reg_class
>    "PR_REGS",                                 \
>    "FFR_REGS",                                        \
>    "PR_AND_FFR_REGS",                         \
> +  "MOVEABLE_SYSREGS",                                \
>    "FAKE_REGS",                                       \
>    "ALL_REGS"                                 \
>  }
> @@ -821,10 +826,11 @@ enum reg_class
>    { 0x00000000, 0x00000000, 0x00000ff0 },    /* PR_LO_REGS */        \
>    { 0x00000000, 0x00000000, 0x000ff000 },    /* PR_HI_REGS */        \
>    { 0x00000000, 0x00000000, 0x000ffff0 },    /* PR_REGS */           \
> -  { 0x00000000, 0x00000000, 0x00300000 },    /* FFR_REGS */          \
> -  { 0x00000000, 0x00000000, 0x003ffff0 },    /* PR_AND_FFR_REGS */   \
> -  { 0x00000000, 0x00000000, 0x3fc00000 },    /* FAKE_REGS */         \
> -  { 0xffffffff, 0xffffffff, 0x000fffff }     /* ALL_REGS */          \
> +  { 0x00000000, 0x00000000, 0x00600000 },    /* FFR_REGS */          \
> +  { 0x00000000, 0x00000000, 0x006ffff0 },    /* PR_AND_FFR_REGS */   \
> +  { 0x00000000, 0x00000000, 0x00100000 },    /* MOVEABLE_SYSREGS */  \
> +  { 0x00000000, 0x00000000, 0x7f800000 },    /* FAKE_REGS */         \
> +  { 0xffffffff, 0xffffffff, 0x001fffff }     /* ALL_REGS */          \
>  }
>  
>  #define REGNO_REG_CLASS(REGNO)       aarch64_regno_regclass (REGNO)
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 94ff0eefa77..b654c0b9bb8 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -107,10 +107,14 @@ (define_constants
>      (P14_REGNUM              82)
>      (P15_REGNUM              83)
>      (LAST_SAVED_REGNUM       83)
> -    (FFR_REGNUM              84)
> +
> +    ;; Floating Point Mode Register, used in FP8 insns.
> +    (FPM_REGNUM              84)
> +
> +    (FFR_REGNUM              85)
>      ;; "FFR token": a fake register used for representing the scheduling
>      ;; restrictions on FFR-related operations.
> -    (FFRT_REGNUM     85)
> +    (FFRT_REGNUM     86)
>  
>      ;; ----------------------------------------------------------------
>      ;; Fake registers
> @@ -122,17 +126,17 @@ (define_constants
>      ;; ABI-related lowering is needed.  These placeholders read and
>      ;; write this register.  Instructions that depend on the lowering
>      ;; read the register.
> -    (LOWERING_REGNUM 86)
> +    (LOWERING_REGNUM 87)
>  
>      ;; Represents the contents of the current function's TPIDR2 block,
>      ;; in abstract form.
> -    (TPIDR2_BLOCK_REGNUM 87)
> +    (TPIDR2_BLOCK_REGNUM 88)
>  
>      ;; Holds the value that the current function wants PSTATE.ZA to be.
>      ;; The actual value can sometimes vary, because it does not track
>      ;; changes to PSTATE.ZA that happen during a lazy save and restore.
>      ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
> -    (SME_STATE_REGNUM 88)
> +    (SME_STATE_REGNUM 89)
>  
>      ;; Instructions write to this register if they set TPIDR2_EL0 to a
>      ;; well-defined value.  Instructions read from the register if they
> @@ -140,14 +144,14 @@ (define_constants
>      ;;
>      ;; The register does not model the architected TPIDR2_ELO, just the
>      ;; current function's management of it.
> -    (TPIDR2_SETUP_REGNUM 89)
> +    (TPIDR2_SETUP_REGNUM 90)
>  
>      ;; Represents the property "has an incoming lazy save been committed?".
> -    (ZA_FREE_REGNUM 90)
> +    (ZA_FREE_REGNUM 91)
>  
>      ;; Represents the property "are the current function's ZA contents
>      ;; stored in the lazy save buffer, rather than in ZA itself?".
> -    (ZA_SAVED_REGNUM 91)
> +    (ZA_SAVED_REGNUM 92)
>  
>      ;; Represents the contents of the current function's ZA state in
>      ;; abstract form.  At various times in the function, these contents
> @@ -155,10 +159,10 @@ (define_constants
>      ;;
>      ;; The contents persist even when the architected ZA is off.  Private-ZA
>      ;; functions have no effect on its contents.
> -    (ZA_REGNUM 92)
> +    (ZA_REGNUM 93)
>  
>      ;; Similarly represents the contents of the current function's ZT0 state.
> -    (ZT0_REGNUM 93)
> +    (ZT0_REGNUM 94)
>  
>      (FIRST_FAKE_REGNUM       LOWERING_REGNUM)
>      (LAST_FAKE_REGNUM        ZT0_REGNUM)
> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64"
>       [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
>       [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
>       [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
> +     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
> +     [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
>    }
>  )
>  
> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64"
>       [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>       [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>       [w  , Ds ; neon_move, simd, 4] << 
> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
> +     [Umv, r  ; mrs      , *   , 8] msr\t%0, %x1
> +     [r, Umv  ; mrs      , *   , 8] mrs\t%x0, %1
>    }
>    "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
> SImode)
>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64"
>       [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
>       [w, Dd ; neon_move, simd, 4] << 
> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
>       [w, Dx ; neon_move, simd, 8] #
> +     [Umv, r; mrs      , *   , 8] msr\t%0, %x1
> +     [r, Umv; mrs      , *   , 8] mrs\t%x0, %1

I think in the case of this pattern (but not the others) the %x modifier
isn't needed since a DImode operand satisfying "r" should get printed as
an x register by default.

Thanks,
Alex

>    }
>    "CONST_INT_P (operands[1])
>     && REG_P (operands[0])
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index a2569cea510..0c81fb28f7e 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS"
>  (define_register_constraint "Uph" "PR_HI_REGS"
>    "SVE predicate registers p8 - p15.")
>  
> +(define_register_constraint "Umv" "MOVEABLE_SYSREGS"
> +  "@internal System Registers suitable for moving rather than requiring an 
> unspec msr")
> +
>  (define_constraint "c"
>   "@internal The condition code register."
>    (match_operand 0 "cc_register"))
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c 
> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> index b774f28c9f0..10fd128d8f9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
> @@ -1,14 +1,14 @@
>  /* Test the fp8 ACLE intrinsics family.  */
>  /* { dg-do compile } */
>  /* { dg-options "-O1 -march=armv8-a" } */
> -/* { dg-final { check-function-bodies "**" "" } } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
>  
>  #ifdef __ARM_FEATURE_FP8
>  #error "__ARM_FEATURE_FP8 feature macro defined."
>  #endif
>  
>  #pragma GCC push_options
> -#pragma GCC target ("arch=armv9.4-a+fp8")
> +#pragma GCC target("arch=armv9.4-a+fp8")
>  
>  #include <arm_acle.h>
>  
> @@ -16,4 +16,105 @@
>  #error "__ARM_FEATURE_FP8 feature macro not defined."
>  #endif
>  
> -#pragma GCC pop_options
> +/*
> +**test_write_fpmr_sysreg_asm_64:
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_64 (uint64_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_32:
> +**   uxtw    x0, w0
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_32 (uint32_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_16:
> +**   and     x0, x0, 65535
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_16 (uint16_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_write_fpmr_sysreg_asm_8:
> +**   and     x0, x0, 255
> +**   msr     fpmr, x0
> +**   ret
> +*/
> +void
> +test_write_fpmr_sysreg_asm_8 (uint8_t val)
> +{
> +  register uint64_t fpmr asm ("fpmr") = val;
> +  asm volatile ("" ::"Umv"(fpmr));
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_64:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint64_t
> +test_read_fpmr_sysreg_asm_64 ()
> +{
> +  register uint64_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_32:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint32_t
> +test_read_fpmr_sysreg_asm_32 ()
> +{
> +  register uint32_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_16:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint16_t
> +test_read_fpmr_sysreg_asm_16 ()
> +{
> +  register uint16_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}
> +
> +/*
> +**test_read_fpmr_sysreg_asm_8:
> +**   mrs     x0, fpmr
> +**   ret
> +*/
> +uint8_t
> +test_read_fpmr_sysreg_asm_8 ()
> +{
> +  register uint8_t fpmr asm ("fpmr");
> +  asm volatile ("" : "=Umv"(fpmr) :);
> +  return fpmr;
> +}

Reply via email to