This implements the new vector optabs vec_<su>addh_narrow<mode>
adding support for in-vectorizer use for early break.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (vec_addh_narrow<mode>): New.
        * config/aarch64/iterators.md (UNSPEC_ADDHN): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/vect-addhn_1.c: New test.

---
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..af9154bec67c21d186463baaf7321c9559c1094b
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
   }
 )
 
+(define_expand "vec_addh_narrow<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+                              (match_operand:VQN 2 "register_operand"))]
+                   UNSPEC_ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                               GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+                                            operands[2], shft));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<su>abal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
b15e57843fed78ffe7edd927cfd7acbf395414a4..45c3968a5c937938b07992ab2869b5ae4db193a8
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,7 @@ (define_c_enum "unspec"
     UNSPEC_UHADD       ; Used in aarch64-simd.md.
     UNSPEC_SRHADD      ; Used in aarch64-simd.md.
     UNSPEC_URHADD      ; Used in aarch64-simd.md.
+    UNSPEC_ADDHN       ; Used in aarch64-simd.md.
     UNSPEC_SHSUB       ; Used in aarch64-simd.md.
     UNSPEC_UHSUB       ; Used in aarch64-simd.md.
     UNSPEC_SQDMULH     ; Used in aarch64-simd.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c 
b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
@@ -0,0 +1,88 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } 
while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                           
       \
+  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                             
       \
+  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                             
       \
+  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                             
       \
+  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                           
       \
+                                                                               
       \
+  void init_##A_TYPE##_##C_TYPE() {                                            
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++) {                                              
       \
+      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                              
       \
+      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                              
       \
+    }                                                                          
       \
+  }                                                                            
       \
+                                                                               
       \
+  void foo_##A_TYPE##_##C_TYPE() {                                             
       \
+    for (int i = 0; i < N; i++)                                                
       \
+      c_##A_TYPE##_##C_TYPE[i] =                                               
       \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                 
       \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                        
       \
+  }                                                                            
       \
+                                                                               
       \
+  void ref_##A_TYPE##_##C_TYPE##_compute() {                                   
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++)                                                
       \
+      ref_##A_TYPE##_##C_TYPE[i] =                                             
       \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                 
       \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                        
       \
+  }                                                                            
       \
+                                                                               
       \
+  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {                
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++) {                                              
       \
+      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {            
       \
+        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",               
       \
+               variant_name, i,                                                
       \
+               (long long)c_##A_TYPE##_##C_TYPE[i],                            
       \
+               (long long)ref_##A_TYPE##_##C_TYPE[i]);                         
       \
+        __builtin_abort ();                                                    
       \
+      }                                                                        
       \
+    }                                                                          
       \
+  }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE)                          \
+  do {                                                     \
+    init_##A_TYPE##_##C_TYPE();                            \
+    foo_##A_TYPE##_##C_TYPE();                             \
+    ref_##A_TYPE##_##C_TYPE##_compute();                   \
+    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
+  } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64__)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+  check_vect ();
+
+  RUN_COMBO(int16_t, int8_t);
+  RUN_COMBO(uint16_t, uint8_t);
+  RUN_COMBO(int32_t, int16_t);
+  RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+  RUN_COMBO(int64_t, int32_t);
+  RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+  return 0;
+}
+


-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..af9154bec67c21d186463baaf7321c9559c1094b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
   }
 )
 
+(define_expand "vec_addh_narrow<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+			       (match_operand:VQN 2 "register_operand"))]
+		    UNSPEC_ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+				GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+					     operands[2], shft));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<su>abal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b15e57843fed78ffe7edd927cfd7acbf395414a4..45c3968a5c937938b07992ab2869b5ae4db193a8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,7 @@ (define_c_enum "unspec"
     UNSPEC_UHADD	; Used in aarch64-simd.md.
     UNSPEC_SRHADD	; Used in aarch64-simd.md.
     UNSPEC_URHADD	; Used in aarch64-simd.md.
+    UNSPEC_ADDHN	; Used in aarch64-simd.md.
     UNSPEC_SHSUB	; Used in aarch64-simd.md.
     UNSPEC_UHSUB	; Used in aarch64-simd.md.
     UNSPEC_SQDMULH	; Used in aarch64-simd.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
@@ -0,0 +1,88 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                                  \
+  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                                    \
+  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                                  \
+                                                                                      \
+  void init_##A_TYPE##_##C_TYPE() {                                                   \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++) {                                                     \
+      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                                     \
+      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                                     \
+    }                                                                                 \
+  }                                                                                   \
+                                                                                      \
+  void foo_##A_TYPE##_##C_TYPE() {                                                    \
+    for (int i = 0; i < N; i++)                                                       \
+      c_##A_TYPE##_##C_TYPE[i] =                                                      \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void ref_##A_TYPE##_##C_TYPE##_compute() {                                          \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++)                                                       \
+      ref_##A_TYPE##_##C_TYPE[i] =                                                    \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {                       \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++) {                                                     \
+      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {                   \
+        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",                      \
+               variant_name, i,                                                       \
+               (long long)c_##A_TYPE##_##C_TYPE[i],                                   \
+               (long long)ref_##A_TYPE##_##C_TYPE[i]);                                \
+        __builtin_abort ();                                                           \
+      }                                                                               \
+    }                                                                                 \
+  }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE)                          \
+  do {                                                     \
+    init_##A_TYPE##_##C_TYPE();                            \
+    foo_##A_TYPE##_##C_TYPE();                             \
+    ref_##A_TYPE##_##C_TYPE##_compute();                   \
+    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
+  } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64__)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+  check_vect ();
+
+  RUN_COMBO(int16_t, int8_t);
+  RUN_COMBO(uint16_t, uint8_t);
+  RUN_COMBO(int32_t, int16_t);
+  RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+  RUN_COMBO(int64_t, int32_t);
+  RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+  return 0;
+}
+

Reply via email to