https://gcc.gnu.org/g:cfa827188dc236ba905b12ef06ccc517b9f2de39
commit r16-2204-gcfa827188dc236ba905b12ef06ccc517b9f2de39 Author: Kyrylo Tkachov <ktkac...@nvidia.com> Date: Thu Jul 3 10:09:47 2025 -0700 aarch64: Use EOR3 for DImode values Similar to BCAX, we can use EOR3 for DImode, but we have to be careful not to force GP<->SIMD moves unnecessarily, so add a splitter for that case. So for input: uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, c); } uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, b, c); } We generate the desired: eor3_d_gp: eor x1, x1, x2 eor x0, x1, x0 ret eor3_d: eor3 v0.16b, v0.16b, v1.16b, v2.16b ret Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> gcc/ * config/aarch64/aarch64-simd.md (*eor3qdi4): New define_insn_and_split. gcc/testsuite/ * gcc.target/aarch64/simd/eor3_d.c: Add tests for DImode operands. Diff: --- gcc/config/aarch64/aarch64-simd.md | 25 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c | 6 +++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 270cb2ff3a12..75192ddfd944 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9191,6 +9191,31 @@ [(set_attr "type" "crypto_sha3")] ) +(define_insn_and_split "*eor3qdi4" + [(set (match_operand:DI 0 "register_operand") + (xor:DI + (xor:DI + (match_operand:DI 2 "register_operand") + (match_operand:DI 3 "register_operand")) + (match_operand:DI 1 "register_operand")))] + "TARGET_SHA3" + {@ [ cons: =0, 1, 2 , 3 ; attrs: type ] + [ w , w, w , w ; crypto_sha3 ] eor3\t%0.16b, %1.16b, %2.16b, %3.16b + [ &r , r, r0, r0 ; multiple ] # + } + "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" + [(set (match_dup 4) (xor:DI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (xor:DI (match_dup 4) (match_dup 1)))] + { + if (reload_completed) + operands[4] = operands[0]; + else if (can_create_pseudo_p ()) + operands[4] = gen_reg_rtx (DImode); + else + FAIL; + } +) + (define_insn "aarch64_rax1qv2di" [(set (match_operand:V2DI 0 "register_operand" "=w") (xor:V2DI diff --git a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c index 7f2b2b422685..6c9595b6acf2 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c @@ -7,9 +7,13 @@ #define EOR3(x,y,z) ((x) ^ (y) ^ (z)) +/* Should not use EOR3 when inputs come from GP regs. */ +uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, c); } + +uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, b, c); } uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return EOR3 (a, b, c); } uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return EOR3 (a, b, c); } uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return EOR3 (a, b, c); } -/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 3 } } */ +/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 4 } } */