https://gcc.gnu.org/g:cfa827188dc236ba905b12ef06ccc517b9f2de39

commit r16-2204-gcfa827188dc236ba905b12ef06ccc517b9f2de39
Author: Kyrylo Tkachov <ktkac...@nvidia.com>
Date:   Thu Jul 3 10:09:47 2025 -0700

    aarch64: Use EOR3 for DImode values
    
    Similar to BCAX, we can use EOR3 for DImode, but we have to be careful
    not to force GP<->SIMD moves unnecessarily, so add a splitter for that case.
    
    So for input:
    uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, 
b, c); }
    uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 
(a, b, c); }
    
    We generate the desired:
    eor3_d_gp:
            eor     x1, x1, x2
            eor     x0, x1, x0
            ret
    
    eor3_d:
            eor3    v0.16b, v0.16b, v1.16b, v2.16b
            ret
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
    
    gcc/
    
            * config/aarch64/aarch64-simd.md (*eor3qdi4): New
            define_insn_and_split.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/simd/eor3_d.c: Add tests for DImode operands.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md             | 25 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c |  6 +++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 270cb2ff3a12..75192ddfd944 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9191,6 +9191,31 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
+(define_insn_and_split "*eor3qdi4"
+  [(set (match_operand:DI 0 "register_operand")
+       (xor:DI
+        (xor:DI
+         (match_operand:DI 2 "register_operand")
+         (match_operand:DI 3 "register_operand"))
+        (match_operand:DI 1 "register_operand")))]
+  "TARGET_SHA3"
+  {@ [ cons: =0, 1, 2 , 3  ; attrs: type ]
+     [ w       , w, w , w  ; crypto_sha3 ] eor3\t%0.16b, %1.16b, %2.16b, %3.16b
+     [ &r      , r, r0, r0 ; multiple    ] #
+  }
+  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 4) (xor:DI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (xor:DI (match_dup 4) (match_dup 1)))]
+  {
+    if (reload_completed)
+      operands[4] = operands[0];
+    else if (can_create_pseudo_p ())
+      operands[4] = gen_reg_rtx (DImode);
+    else
+      FAIL;
+  }
+)
+
 (define_insn "aarch64_rax1qv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w")
        (xor:V2DI
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c 
b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
index 7f2b2b422685..6c9595b6acf2 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
@@ -7,9 +7,13 @@
 
 #define EOR3(x,y,z)  ((x) ^ (y) ^ (z))
 
+/* Should not use EOR3 when inputs come from GP regs.  */
+uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, 
c); }
+
+uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, 
b, c); }
 uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return EOR3 (a, 
b, c); }
 uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return EOR3 (a, 
b, c); }
 uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return EOR3 (a, b, 
c); }
 
-/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, 
v[0-9]+.16b} 3 } } */
+/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, 
v[0-9]+.16b} 4 } } */

Reply via email to