On 31/07/2024 08:57, Kyrylo Tkachov wrote:
> Hi Claudio,
> 
>> On 31 Jul 2024, at 08:29, Claudio Bantaloukas <claudio.bantalou...@arm.com> 
>> wrote:
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Unlike most system registers, fpmr can be heavily written to in code that
>> exercises the fp8 functionality. That is because every fp8 instrinsic call
>> can potentially change the value of fpmr.
>> Rather than just use an unspec, we treat the fpmr system register like
>> all other registers and use a move operation to read and write to it.
>>
>> We introduce a new class of moveable system registers that, currently,
>> only accepts fpmr and a new constraint, Umv, that allows us to
>> selectively use mrs and msr instructions when expanding rtl for them.
>> Given that there is code that depends on "real" registers coming before
>> "fake" ones, we introduce a new constant FPM_REGNUM that uses an
>> existing value and renumber registers below that.
>> This requires us to update the bitmaps that describe which registers
>> belong to each register class.
> 
> So I like the approach though I’ll let Richard review the implementation 
> details here.
> My only slight concern here is compatibility with LLVM. I notice that LLVM 
> doesn’t accept the test case you’ve included as it doesn’t understand “fpmr” 
> in its inline assembly. It also doesn’t support the new constraint, of course.
> Do you know if there are plans to teach LLVM these inline assembly constructs 
> to avoid creating GCC-only sources for fp8?

Hi Kyrill,
I asked and got confirmation that a patch to add fpmr as a register 
should land soon.

Cheers,
Claudio

> Thanks,
> Kyrill
> 
> 
>>
>> gcc/ChangeLog:
>>
>>         * config/aarch64/aarch64.cc (aarch64_hard_regno_nregs): Add
>>         support for MOVEABLE_SYSREGS class.
>>         (aarch64_hard_regno_mode_ok): Allow reads and writes to fpmr.
>>         (aarch64_regno_regclass): Support MOVEABLE_SYSREGS class.
>>         (aarch64_class_max_nregs): Likewise.
>>         * config/aarch64/aarch64.h (FIXED_REGISTERS): add fpmr.
>>         (CALL_REALLY_USED_REGISTERS): Likewise.
>>         (REGISTER_NAMES): Likewise.
>>         (enum reg_class): Add MOVEABLE_SYSREGS class.
>>         (REG_CLASS_NAMES): Likewise.
>>         (REG_CLASS_CONTENTS): Update class bitmaps to deal with fpmr,
>>         the new MOVEABLE_REGS class and renumbering of registers.
>>         * config/aarch64/aarch64.md: (FPM_REGNUM): added new register
>>         number, reusing old value.
>>         (FFR_REGNUM): Renumber.
>>         (FFRT_REGNUM): Likewise.
>>         (LOWERING_REGNUM): Likewise.
>>         (TPIDR2_BLOCK_REGNUM): Likewise.
>>         (SME_STATE_REGNUM): Likewise.
>>         (TPIDR2_SETUP_REGNUM): Likewise.
>>         (ZA_FREE_REGNUM): Likewise.
>>         (ZA_SAVED_REGNUM): Likewise.
>>         (ZA_REGNUM): Likewise.
>>         (ZT0_REGNUM): Likewise.
>>         (*mov<mode>_aarch64): Add support for moveable sysregs.
>>         (*movsi_aarch64): Likewise.
>>         (*movdi_aarch64): Likewise.
>>         * config/aarch64/constraints.md (MOVEABLE_SYSREGS): New constraint.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.target/aarch64/acle/fp8.c: New tests.
>> ---
>> gcc/config/aarch64/aarch64.cc               |   8 ++
>> gcc/config/aarch64/aarch64.h                |  14 ++-
>> gcc/config/aarch64/aarch64.md               |  30 ++++--
>> gcc/config/aarch64/constraints.md           |   3 +
>> gcc/testsuite/gcc.target/aarch64/acle/fp8.c | 101 ++++++++++++++++++++
>> 5 files changed, 142 insertions(+), 14 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>> index e0cf382998c..9810f2c0390 100644
>> --- a/gcc/config/aarch64/aarch64.cc
>> +++ b/gcc/config/aarch64/aarch64.cc
>> @@ -2018,6 +2018,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode 
>> mode)
>>      case PR_HI_REGS:
>>        return mode == VNx32BImode ? 2 : 1;
>>
>> +    case MOVEABLE_SYSREGS:
>>      case FFR_REGS:
>>      case PR_AND_FFR_REGS:
>>      case FAKE_REGS:
>> @@ -2045,6 +2046,9 @@ aarch64_hard_regno_mode_ok (unsigned regno, 
>> machine_mode mode)
>>      /* This must have the same size as _Unwind_Word.  */
>>      return mode == DImode;
>>
>> +  if (regno == FPM_REGNUM)
>> +    return mode == QImode || mode == HImode || mode == SImode || mode == 
>> DImode;
>> +
>>    unsigned int vec_flags = aarch64_classify_vector_mode (mode);
>>    if (vec_flags == VEC_SVE_PRED)
>>      return pr_or_ffr_regnum_p (regno);
>> @@ -12680,6 +12684,9 @@ aarch64_regno_regclass (unsigned regno)
>>    if (PR_REGNUM_P (regno))
>>      return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
>>
>> +  if (regno == FPM_REGNUM)
>> +    return MOVEABLE_SYSREGS;
>> +
>>    if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
>>      return FFR_REGS;
>>
>> @@ -13068,6 +13075,7 @@ aarch64_class_max_nregs (reg_class_t regclass, 
>> machine_mode mode)
>>      case PR_HI_REGS:
>>        return mode == VNx32BImode ? 2 : 1;
>>
>> +    case MOVEABLE_SYSREGS:
>>      case STACK_REG:
>>      case FFR_REGS:
>>      case PR_AND_FFR_REGS:
>> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
>> index 2e75c6b81e2..2dfb999bea5 100644
>> --- a/gcc/config/aarch64/aarch64.h
>> +++ b/gcc/config/aarch64/aarch64.h
>> @@ -523,6 +523,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
>> ATTRIBUTE_UNUSED
>>      1, 1, 1, 1, /* SFP, AP, CC, VG */ \
>>      0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
>>      0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
>> +    1, /* FPMR */ \
>>      1, 1, /* FFR and FFRT */ \
>>      1, 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \
>>    }
>> @@ -547,6 +548,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
>> ATTRIBUTE_UNUSED
>>      1, 1, 1, 0, /* SFP, AP, CC, VG */ \
>>      1, 1, 1, 1,   1, 1, 1, 1, /* P0 - P7 */ \
>>      1, 1, 1, 1,   1, 1, 1, 1, /* P8 - P15 */ \
>> +    1, /* FPMR */ \
>>      1, 1, /* FFR and FFRT */ \
>>      0, 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \
>>    }
>> @@ -564,6 +566,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
>> ATTRIBUTE_UNUSED
>>      "sfp", "ap",  "cc",  "vg", \
>>      "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7", \
>>      "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15", \
>> +    "fpmr", \
>>      "ffr", "ffrt", \
>>      "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \
>>      "za_free", "za_saved", "za", "zt0" \
>> @@ -775,6 +778,7 @@ enum reg_class
>>    PR_REGS,
>>    FFR_REGS,
>>    PR_AND_FFR_REGS,
>> +  MOVEABLE_SYSREGS,
>>    FAKE_REGS,
>>    ALL_REGS,
>>    LIM_REG_CLASSES /* Last */
>> @@ -801,6 +805,7 @@ enum reg_class
>>    "PR_REGS", \
>>    "FFR_REGS", \
>>    "PR_AND_FFR_REGS", \
>> +  "MOVEABLE_SYSREGS", \
>>    "FAKE_REGS", \
>>    "ALL_REGS" \
>> }
>> @@ -822,10 +827,11 @@ enum reg_class
>>    { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \
>>    { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \
>>    { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \
>> -  { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \
>> -  { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \
>> -  { 0x00000000, 0x00000000, 0x3fc00000 }, /* FAKE_REGS */ \
>> -  { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \
>> +  { 0x00000000, 0x00000000, 0x00600000 }, /* FFR_REGS */ \
>> +  { 0x00000000, 0x00000000, 0x006ffff0 }, /* PR_AND_FFR_REGS */ \
>> +  { 0x00000000, 0x00000000, 0x00100000 }, /* MOVEABLE_SYSREGS */ \
>> +  { 0x00000000, 0x00000000, 0x7f800000 }, /* FAKE_REGS */ \
>> +  { 0xffffffff, 0xffffffff, 0x001fffff } /* ALL_REGS */ \
>> }
>>
>> #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO)
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>> index ed29127dafb..ed1bd2ede7d 100644
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -107,10 +107,14 @@ (define_constants
>>      (P14_REGNUM 82)
>>      (P15_REGNUM 83)
>>      (LAST_SAVED_REGNUM 83)
>> -    (FFR_REGNUM 84)
>> +
>> +    ;; Floating Point Mode Register, used in FP8 insns.
>> +    (FPM_REGNUM 84)
>> +
>> +    (FFR_REGNUM 85)
>>      ;; "FFR token": a fake register used for representing the scheduling
>>      ;; restrictions on FFR-related operations.
>> -    (FFRT_REGNUM 85)
>> +    (FFRT_REGNUM 86)
>>
>>      ;; ----------------------------------------------------------------
>>      ;; Fake registers
>> @@ -122,17 +126,17 @@ (define_constants
>>      ;; ABI-related lowering is needed.  These placeholders read and
>>      ;; write this register.  Instructions that depend on the lowering
>>      ;; read the register.
>> -    (LOWERING_REGNUM 86)
>> +    (LOWERING_REGNUM 87)
>>
>>      ;; Represents the contents of the current function's TPIDR2 block,
>>      ;; in abstract form.
>> -    (TPIDR2_BLOCK_REGNUM 87)
>> +    (TPIDR2_BLOCK_REGNUM 88)
>>
>>      ;; Holds the value that the current function wants PSTATE.ZA to be.
>>      ;; The actual value can sometimes vary, because it does not track
>>      ;; changes to PSTATE.ZA that happen during a lazy save and restore.
>>      ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
>> -    (SME_STATE_REGNUM 88)
>> +    (SME_STATE_REGNUM 89)
>>
>>      ;; Instructions write to this register if they set TPIDR2_EL0 to a
>>      ;; well-defined value.  Instructions read from the register if they
>> @@ -140,14 +144,14 @@ (define_constants
>>      ;;
>>      ;; The register does not model the architected TPIDR2_ELO, just the
>>      ;; current function's management of it.
>> -    (TPIDR2_SETUP_REGNUM 89)
>> +    (TPIDR2_SETUP_REGNUM 90)
>>
>>      ;; Represents the property "has an incoming lazy save been committed?".
>> -    (ZA_FREE_REGNUM 90)
>> +    (ZA_FREE_REGNUM 91)
>>
>>      ;; Represents the property "are the current function's ZA contents
>>      ;; stored in the lazy save buffer, rather than in ZA itself?".
>> -    (ZA_SAVED_REGNUM 91)
>> +    (ZA_SAVED_REGNUM 92)
>>
>>      ;; Represents the contents of the current function's ZA state in
>>      ;; abstract form.  At various times in the function, these contents
>> @@ -155,10 +159,10 @@ (define_constants
>>      ;;
>>      ;; The contents persist even when the architected ZA is off.  Private-ZA
>>      ;; functions have no effect on its contents.
>> -    (ZA_REGNUM 92)
>> +    (ZA_REGNUM 93)
>>
>>      ;; Similarly represents the contents of the current function's ZT0 
>> state.
>> -    (ZT0_REGNUM 93)
>> +    (ZT0_REGNUM 94)
>>
>>      (FIRST_FAKE_REGNUM LOWERING_REGNUM)
>>      (LAST_FAKE_REGNUM ZT0_REGNUM)
>> @@ -1405,6 +1409,8 @@ (define_insn "*mov<mode>_aarch64"
>>       [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
>>       [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
>>       [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
>> +     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
>> +     [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
>>    }
>> )
>>
>> @@ -1467,6 +1473,8 @@ (define_insn_and_split "*movsi_aarch64"
>>       [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>>       [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>>       [w  , Ds ; neon_move, simd, 4] << 
>> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
>> +     [Umv, r  ; mrs      , *   , 4] msr\t%0, %x1
>> +     [r, Umv  ; mrs      , *   , 4] mrs\t%x0, %1
>>    }
>>    "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
>> SImode)
>>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>> @@ -1505,6 +1513,8 @@ (define_insn_and_split "*movdi_aarch64"
>>       [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
>>       [w, Dd ; neon_move, simd, 4] << 
>> aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
>>       [w, Dx ; neon_move, simd, 8] #
>> +     [Umv, r; mrs      , *   , 4] msr\t%0, %1
>> +     [r, Umv; mrs      , *   , 4] mrs\t%0, %1
>>    }
>>    "CONST_INT_P (operands[1])
>>     && REG_P (operands[0])
>> diff --git a/gcc/config/aarch64/constraints.md 
>> b/gcc/config/aarch64/constraints.md
>> index a2569cea510..0c81fb28f7e 100644
>> --- a/gcc/config/aarch64/constraints.md
>> +++ b/gcc/config/aarch64/constraints.md
>> @@ -77,6 +77,9 @@ (define_register_constraint "Upl" "PR_LO_REGS"
>> (define_register_constraint "Uph" "PR_HI_REGS"
>>    "SVE predicate registers p8 - p15.")
>>
>> +(define_register_constraint "Umv" "MOVEABLE_SYSREGS"
>> +  "@internal System Registers suitable for moving rather than requiring an 
>> unspec msr")
>> +
>> (define_constraint "c"
>>   "@internal The condition code register."
>>    (match_operand 0 "cc_register"))
>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c 
>> b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> index 459442be155..afb44f83f60 100644
>> --- a/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/fp8.c
>> @@ -1,6 +1,7 @@
>> /* Test the fp8 ACLE intrinsics family.  */
>> /* { dg-do compile } */
>> /* { dg-options "-O1 -march=armv8-a" } */
>> +/* { dg-final { check-function-bodies "**" "" "" } } */
>>
>> #include <arm_acle.h>
>>
>> @@ -17,4 +18,104 @@
>> #error "__ARM_FEATURE_FP8 feature macro defined."
>> #endif
>>
>> +/*
>> +**test_write_fpmr_sysreg_asm_64:
>> +** msr fpmr, x0
>> +** ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_64 (uint64_t val)
>> +{
>> +  register uint64_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_32:
>> +** msr fpmr, x0
>> +** ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_32 (uint32_t val)
>> +{
>> +  register uint32_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_16:
>> +** msr fpmr, x0
>> +** ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_16 (uint16_t val)
>> +{
>> +  register uint16_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_write_fpmr_sysreg_asm_8:
>> +** msr fpmr, x0
>> +** ret
>> +*/
>> +void
>> +test_write_fpmr_sysreg_asm_8 (uint8_t val)
>> +{
>> +  register uint8_t fpmr asm ("fpmr") = val;
>> +  asm volatile ("" ::"Umv"(fpmr));
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_64:
>> +** mrs x0, fpmr
>> +** ret
>> +*/
>> +uint64_t
>> +test_read_fpmr_sysreg_asm_64 ()
>> +{
>> +  register uint64_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_32:
>> +** mrs x0, fpmr
>> +** ret
>> +*/
>> +uint32_t
>> +test_read_fpmr_sysreg_asm_32 ()
>> +{
>> +  register uint32_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_16:
>> +** mrs x0, fpmr
>> +** ret
>> +*/
>> +uint16_t
>> +test_read_fpmr_sysreg_asm_16 ()
>> +{
>> +  register uint16_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> +/*
>> +**test_read_fpmr_sysreg_asm_8:
>> +** mrs x0, fpmr
>> +** ret
>> +*/
>> +uint8_t
>> +test_read_fpmr_sysreg_asm_8 ()
>> +{
>> +  register uint8_t fpmr asm ("fpmr");
>> +  asm volatile ("" : "=Umv"(fpmr) :);
>> +  return fpmr;
>> +}
>> +
>> #pragma GCC pop_options
> 

Reply via email to