This patch extends the SMULH and UMULH support to unpacked vectors.
The type suffix must be taken from the element size rather than the
container size.

The main use of these patterns is to support division and modulus
by a constant.  The conditional forms would be hard to trigger from
non-ACLE code, and ACLE code needs fully-packed vectors only.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
        * config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart)
        (@aarch64_pred_<MUL_HIGHPART:optab><mode>): Extend from SVE_FULL_I
        to SVE_I.

gcc/testsuite/
        * gcc.target/aarch64/sve/mul_highpart_3.c: New test.
---
 gcc/config/aarch64/aarch64-sve.md             | 20 +++++------
 .../gcc.target/aarch64/sve/mul_highpart_3.c   | 34 +++++++++++++++++++
 2 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 8083749a07e..2e6128e6032 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4192,12 +4192,12 @@ (define_insn "@aarch64_sve_<optab><mode>"
 
 ;; Unpredicated highpart multiplication.
 (define_expand "<su>mul<mode>3_highpart"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
          [(match_dup 3)
-          (unspec:SVE_FULL_I
-            [(match_operand:SVE_FULL_I 1 "register_operand")
-             (match_operand:SVE_FULL_I 2 "register_operand")]
+          (unspec:SVE_I
+            [(match_operand:SVE_I 1 "register_operand")
+             (match_operand:SVE_I 2 "register_operand")]
             MUL_HIGHPART)]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
@@ -4208,12 +4208,12 @@ (define_expand "<su>mul<mode>3_highpart"
 
 ;; Predicated highpart multiplication.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:SVE_FULL_I
-            [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
-             (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
+          (unspec:SVE_I
+            [(match_operand:SVE_I 2 "register_operand" "%0, w")
+             (match_operand:SVE_I 3 "register_operand" "w, w")]
             MUL_HIGHPART)]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
new file mode 100644
index 00000000000..3aa6575e4ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_3.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE test##_##TYPE##_reg (TYPE a, TYPE b) { return a % 17; }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (int8_t, 32)
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (int8_t, 64)
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (int16_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (int8_t, 128)
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (int16_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (int32_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

Reply via email to