Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

This patch supports sminmax for partial vectorized V2BF/V4BF.

gcc/ChangeLog:

        * config/i386/mmx.md (<code><mode>3): New define_expand for 
V2BF/V4BFsmaxmin

gcc/testsuite/ChangeLog:

        * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.
---
 gcc/config/i386/mmx.md                        | 19 ++++++++++
 .../avx10_2-partial-bf-vector-smaxmin-1.c     | 36 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9116ddb5321..3f12a1349ab 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2098,6 +2098,25 @@
   DONE;
 })
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+    (smaxmin:VBF_32_64
+      (match_operand:VBF_32_64 1 "nonimmediate_operand")
+      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+                           force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode,
+                           force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 (define_expand "sqrt<mode>2"
   [(set (match_operand:VHF_32_64 0 "register_operand")
        (sqrt:VHF_32_64
diff --git 
a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
new file mode 100644
index 00000000000..0a7cc58e29d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -Ofast" } */
+/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
+/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+
+void
+maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
-- 
2.31.1

Reply via email to