[PATCH 10/12] AVX10.2: Support vector copy instructions

Haochen Jiang Mon, 19 Aug 2024 02:03:44 -0700

From: "Zhang, Jun" <jun.zh...@intel.com>

gcc/ChangeLog:


        * config/config.gcc: Add avx10_2copyintrin.h.
        * config/i386/i386.md (avx10_2): New isa attribute.
        * config/i386/immintrin.h: Include avx10_2copyintrin.h.
        * config/i386/sse.md
        (sse_movss_<mode>): Add new constraints to handle AVX10.2.
        (vec_set<mode>_0): Ditto.
        (@vec_set<mode>_0): Ditto.
        (vec_set<mode>_0): Ditto.
        (avx512fp16_mov<mode>): Ditto.
        (*vec_set<mode>_0_1): New split.
        * config/i386/avx10_2copyintrin.h: New file.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/avx10_2-vmovd-1.c: New test.
        * gcc.target/i386/avx10_2-vmovd-2.c: Ditto.
        * gcc.target/i386/avx10_2-vmovw-1.c: Ditto.
        * gcc.target/i386/avx10_2-vmovw-2.c: Ditto.
---
 gcc/config.gcc                                |   3 +-
 gcc/config/i386/avx10_2copyintrin.h           |  38 +++++
 gcc/config/i386/i386.md                       |   3 +-
 gcc/config/i386/immintrin.h                   |   2 +
 gcc/config/i386/sse.md                        | 138 +++++++++++-------
 .../gcc.target/i386/avx10_2-vmovd-1.c         |  48 ++++++
 .../gcc.target/i386/avx10_2-vmovd-2.c         |  44 ++++++
 .../gcc.target/i386/avx10_2-vmovw-1.c         |  69 +++++++++
 .../gcc.target/i386/avx10_2-vmovw-2.c         |  64 ++++++++
 9 files changed, 356 insertions(+), 53 deletions(-)
 create mode 100644 gcc/config/i386/avx10_2copyintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cd8a34b292f..e887c9c7432 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -457,7 +457,8 @@ i[34567]86-*-* | x86_64-*-*)
                       avx10_2convertintrin.h avx10_2-512convertintrin.h
                       avx10_2bf16intrin.h avx10_2-512bf16intrin.h
                       avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
-                      avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h"
+                      avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
+                      avx10_2copyintrin.h"
        ;;
 ia64-*-*)
        extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2copyintrin.h 
b/gcc/config/i386/avx10_2copyintrin.h
new file mode 100644
index 00000000000..f1150c71dbf
--- /dev/null
+++ b/gcc/config/i386/avx10_2copyintrin.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of GCC.
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> 
instead."
+#endif
+
+#ifndef _AVX10_2COPYINTRIN_H_INCLUDED
+#define _AVX10_2COPYINTRIN_H_INCLUDED
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_move_epi32 (__m128i __A)
+{
+  return _mm_set_epi32 (0, 0, 0, ((__v4si) __A)[0]);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_move_epi16 (__m128i __A)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi) __A)[0]);
+}
+
+#endif /* _AVX10_2COPYINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 470ae5444db..e28f9bb5eae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -582,7 +582,7 @@
                    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
                    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
                    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-                   vaes_avx512vl,noapx_nf,apx_cfcmov"
+                   vaes_avx512vl,noapx_nf,apx_cfcmov,avx10_2"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -979,6 +979,7 @@
           (symbol_ref "TARGET_APX_NDD && Pmode == DImode")
         (eq_attr "isa" "vaes_avx512vl")
           (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
+        (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256")
 
         (eq_attr "mmx_isa" "native")
           (symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 0d5af155c36..6b8035e6467 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -160,4 +160,6 @@
 
 #include <avx10_2-512minmaxintrin.h>
 
+#include <avx10_2copyintrin.h>
+
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 956cdba55d3..93aa6d46ae4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11505,19 +11505,20 @@
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
 (define_insn "sse_movss_<mode>"
-  [(set (match_operand:VI4F_128 0 "register_operand"   "=x,v")
+  [(set (match_operand:VI4F_128 0 "register_operand"   "=x,v,v")
        (vec_merge:VI4F_128
-         (match_operand:VI4F_128 2 "register_operand" " x,v")
-         (match_operand:VI4F_128 1 "register_operand" " 0,v")
+         (match_operand:VI4F_128 2 "register_operand" " x,v,v")
+         (match_operand:VI4F_128 1 "reg_or_0_operand" " 0,v,C")
          (const_int 1)))]
   "TARGET_SSE"
   "@
    movss\t{%2, %0|%0, %2}
-   vmovss\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+   vmovss\t{%2, %1, %0|%0, %1, %2}
+   vmovd\t{%2, %0|%0, %2}"
+  [(set_attr "isa" "noavx,avx,avx10_2")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,maybe_evex")
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "orig,maybe_evex,evex")
+   (set_attr "mode" "SF,SF,SI")])
 
 (define_insn "avx2_vec_dup<mode>"
   [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
@@ -11687,18 +11688,19 @@
 ;; see comment above inline_secondary_memory_needed function in i386.cc
 (define_insn "vec_set<mode>_0"
   [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
-         "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
+         "=Yr,*x,v,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
        (vec_merge:VI4F_128
          (vec_duplicate:VI4F_128
            (match_operand:<ssescalarmode> 2 "general_operand"
-         " Yr,*x,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
+         " Yr,*x,v,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
          (match_operand:VI4F_128 1 "nonimm_or_0_operand"
-         " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
+         " C , C,C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
          (const_int 1)))]
   "TARGET_SSE"
   "@
    insertps\t{$0xe, %2, %0|%0, %2, 0xe}
    insertps\t{$0xe, %2, %0|%0, %2, 0xe}
+   vmovd\t{%2, %0|%0, %2}
    vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
    %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
    %vmovd\t{%2, %0|%0, %2}
@@ -11712,22 +11714,24 @@
    #
    #"
   [(set (attr "isa")
-     (cond [(eq_attr "alternative" "0,1,8,9")
+     (cond [(eq_attr "alternative" "0,1,9,10")
              (const_string "sse4_noavx")
-           (eq_attr "alternative" "2,7,10")
+               (eq_attr "alternative" "2")
+                 (const_string "avx10_2")
+           (eq_attr "alternative" "3,8,11")
              (const_string "avx")
-           (eq_attr "alternative" "3,4")
+           (eq_attr "alternative" "4,5")
              (const_string "sse2")
-           (eq_attr "alternative" "5,6")
+           (eq_attr "alternative" "6,7")
              (const_string "noavx")
           ]
           (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "0,1,2,8,9,10")
+     (cond [(eq_attr "alternative" "0,1,3,9,10,11")
              (const_string "sselog")
-           (eq_attr "alternative" "12")
-             (const_string "imov")
            (eq_attr "alternative" "13")
+             (const_string "imov")
+           (eq_attr "alternative" "14")
              (const_string "fmov")
           ]
           (const_string "ssemov")))
@@ -11736,45 +11740,46 @@
                   (const_string "gpr16")
                   (const_string "*")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "8,9,10")
+     (if_then_else (eq_attr "alternative" "9,10,11")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "8,9,10")
+     (if_then_else (eq_attr "alternative" "9,10,11")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "prefix")
-     (cond [(eq_attr "alternative" "0,1,5,6,8,9")
+     (cond [(eq_attr "alternative" "0,1,6,7,9,10")
              (const_string "orig")
-           (eq_attr "alternative" "2")
+           (eq_attr "alternative" "2,3")
              (const_string "maybe_evex")
-           (eq_attr "alternative" "3,4")
+           (eq_attr "alternative" "4,5")
              (const_string "maybe_vex")
-           (eq_attr "alternative" "7,10")
+           (eq_attr "alternative" "8,11")
              (const_string "vex")
           ]
           (const_string "*")))
-   (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
+   (set_attr "mode" "SF,SF,SI,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "4")
+     (cond [(eq_attr "alternative" "5")
              (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
           ]
           (symbol_ref "true")))])
 
 (define_insn "@vec_set<mode>_0"
   [(set (match_operand:V8_128 0 "register_operand"
-         "=v,v,v,x,x,Yr,*x,x,x,x,v,v")
+         "=v,v,v,v,x,x,Yr,*x,x,x,x,v,v")
        (vec_merge:V8_128
          (vec_duplicate:V8_128
            (match_operand:<ssescalarmode> 2 "nonimmediate_operand"
-         " r,m,v,r,m,Yr,*x,r,m,x,r,m"))
+         " r,m,v,v,r,m,Yr,*x,r,m,x,r,m"))
          (match_operand:V8_128 1 "reg_or_0_operand"
-         " C,C,v,0,0,0 ,0 ,x,x,x,v,v")
+         " C,C,C,v,0,0,0 ,0 ,x,x,x,v,v")
          (const_int 1)))]
   "TARGET_SSE2"
   "@
    vmovw\t{%k2, %0|%0, %k2}
    vmovw\t{%2, %0|%0, %2}
+   vmovw\t{%2, %0|%0, %2}
    vmovsh\t{%2, %1, %0|%0, %1, %2}
    pinsrw\t{$0, %k2, %0|%0, %k2, 0}
    pinsrw\t{$0, %2, %0|%0, %2, 0}
@@ -11786,65 +11791,92 @@
    vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0}
    vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}"
   [(set (attr "isa")
-       (cond [(eq_attr "alternative" "0,1,2")
+       (cond [(eq_attr "alternative" "0,1,3")
                 (const_string "avx512fp16")
-              (eq_attr "alternative" "3,4")
+              (eq_attr "alternative" "2")
+                (const_string "avx10_2")
+              (eq_attr "alternative" "4,5")
                 (const_string "noavx")
-              (eq_attr "alternative" "5,6")
+              (eq_attr "alternative" "6,7")
                 (const_string "sse4_noavx")
-              (eq_attr "alternative" "7,8,9")
+              (eq_attr "alternative" "8,9,10")
                 (const_string "avx")
-              (eq_attr "alternative" "10,11")
+              (eq_attr "alternative" "11,12")
                 (const_string "avx512bw")
              ]
              (const_string "*")))
    (set (attr "type")
-     (if_then_else (eq_attr "alternative" "0,1,2,5,6,9")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,6,7,10")
                   (const_string "ssemov")
                   (const_string "sselog")))
    (set (attr "prefix_data16")
-     (if_then_else (eq_attr "alternative" "3,4")
+     (if_then_else (eq_attr "alternative" "4,5")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "5,6,9")
+     (if_then_else (eq_attr "alternative" "6,7,10")
                   (const_string "1")
                   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
                   (const_string "*")
                   (const_string "1")))
    (set (attr "prefix")
-       (cond [(eq_attr "alternative" "0,1,2,10,11")
+       (cond [(eq_attr "alternative" "0,1,2,3,11,12")
                 (const_string "evex")
-              (eq_attr "alternative" "7,8,9")
+              (eq_attr "alternative" "8,9,10")
                 (const_string "vex")
              ]
              (const_string "orig")))
    (set (attr "mode")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
                   (const_string "HF")
                   (const_string "TI")))
    (set (attr "enabled")
      (cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == 
V8BFmode"))
-                (eq_attr "alternative" "2"))
+                (eq_attr "alternative" "3"))
              (symbol_ref "false")
           ]
           (const_string "*")))])
 
+(define_insn_and_split "*vec_set<mode>_0_1"
+  [(set (match_operand:V8_128 0 "register_operand")
+       (vec_merge:V8_128
+         (vec_duplicate:V8_128
+           (vec_select:<ssescalarmode>
+             (match_operand:V8_128 2 "nonimmediate_operand")
+             (parallel [(const_int 0)])))
+         (match_operand:V8_128 1 "reg_or_0_operand")
+         (const_int 1)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_merge:V8_128
+         (vec_duplicate:V8_128 (match_dup 2))
+         (match_dup 1)
+         (const_int 1)))]
+{
+  if (register_operand (operands[2], <MODE>mode))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  operands[2] = gen_lowpart (<ssescalarmode>mode, operands[2]);
+})
+
 ;; vmovw clears also the higer bits
 (define_insn "vec_set<mode>_0"
-  [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v")
+  [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v,v")
        (vec_merge:VI2F_256_512
          (vec_duplicate:VI2F_256_512
-           (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
+           (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,v"))
          (match_operand:VI2F_256_512 1 "const0_operand")
          (const_int 1)))]
   "TARGET_AVX512FP16"
   "@
    vmovw\t{%k2, %x0|%x0, %k2}
+   vmovw\t{%2, %x0|%x0, %2}
    vmovw\t{%2, %x0|%x0, %2}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "isa" "*,*,avx10_2")
+   (set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
@@ -11889,16 +11921,20 @@
 })
 
 (define_insn "avx512fp16_mov<mode>"
-  [(set (match_operand:V8_128 0 "register_operand" "=v")
+  [(set (match_operand:V8_128 0 "register_operand" "=v,v")
        (vec_merge:V8_128
-         (match_operand:V8_128 2 "register_operand" "v")
-         (match_operand:V8_128 1 "register_operand" "v")
+         (match_operand:V8_128 2 "register_operand" "v,v")
+         (match_operand:V8_128 1 "reg_or_0_operand" "v,C")
          (const_int 1)))]
-  "TARGET_AVX512FP16"
-  "vmovsh\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemov")
+  "TARGET_AVX512FP16
+  || (TARGET_AVX10_2_256 && const0_operand (operands[1], <MODE>mode))"
+  "@
+    vmovsh\t{%2, %1, %0|%0, %1, %2}
+    vmovw\t{%2, %0|%2, %0}"
+  [(set_attr "isa" "*,avx10_2")
+   (set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "HF")])
+   (set_attr "mode" "HF,HI")])
 
 ;; A subset is vec_setv4sf.
 (define_insn "*vec_setv4sf_sse4_1"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
new file mode 100644
index 00000000000..275bbade106
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovd\t4\\(%esp\\), %xmm0" 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovss\t4\\(%esp\\), %xmm0" 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 3 { target ia32 } } 
} */
+/* { dg-final { scan-assembler-times "vmovd\t%edi, %xmm0" 1 { target { ! ia32 
} } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 4 { target { ! ia32 
} } } } */
+
+
+#include<immintrin.h>
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef float v4sf __attribute__((vector_size(16)));
+
+v4si
+__attribute__((noipa, unused))
+f1 (int a)
+{
+  return __extension__(v4si){a, 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f2 (float a)
+{
+  return __extension__(v4sf){a, 0, 0, 0};
+}
+
+v4si
+__attribute__((noipa, unused))
+f3 (v4si a)
+{
+  return __extension__(v4si){a[0], 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f4 (v4sf a)
+{
+  return __extension__(v4sf){a[0], 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f5 (__m128i a)
+{
+  return _mm_set_epi32 (0, 0, 0,((__v4si)a)[0]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
new file mode 100644
index 00000000000..7d659300d81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovd-1.c"
+
+static void
+TEST (void)
+{
+  union128i_d u1, s1;
+  int e1[4] = {0};
+
+  s1.x = _mm_set_epi32(-12876, -12886, -12776, 3376590);
+  e1[0] = s1.a[0];
+
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f1((int)s1.a[0]);
+  if (check_union128i_d (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f2(((float*)s1.a)[0]);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f3((v4si)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f4((v4sf)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi32(-1, -1, -1, -1);
+  u1.x = (__m128i)f5((__m128i)s1.x);
+  if (check_union128i_d (u1, e1))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
new file mode 100644
index 00000000000..ec19a9a263a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovw\t4\\(%esp\\), %xmm0" 3 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t8\\(%ebp\\), %xmm0" 1 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 4 { target ia32 } } 
} */
+/* { dg-final { scan-assembler-times "vmovw\t%edi, %xmm0" 1 { target { ! ia32 
} } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 7 { target { ! ia32 
} } } } */
+
+#include<immintrin.h>
+
+typedef _Float16 v8hf __attribute__((vector_size(16)));
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+
+v8hf
+__attribute__((noipa, unused))
+f1 (_Float16 a)
+{
+  return __extension__(v8hf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f2 (__bf16 a)
+{
+  return __extension__(v8bf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f3 (short a)
+{
+  return __extension__(v8hi){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hf
+__attribute__((noipa, unused))
+f4 (v8hf a)
+{
+  return __extension__(v8hf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f5 (v8bf a)
+{
+  return __extension__(v8bf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f6 (v8hi a)
+{
+  return __extension__(v8hi){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f7 (__m128i a)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi)a)[0]);
+}
+
+__m256h
+__attribute__((noipa, unused))
+f8 (_Float16 a)
+{
+  return _mm256_set_ph (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
new file mode 100644
index 00000000000..d63739e6887
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovw-1.c"
+
+static void
+TEST (void)
+{
+  union128i_w u1, s1;
+  union256i_w u2, s2;
+  short e1[8] = {0};
+  short e2[16] = {0};
+
+  s1.x = _mm_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 
30158);
+  e1[0] = s1.a[0];
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f1(((_Float16*)s1.a)[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f2(((__bf16*)s1.a)[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f3((short)s1.a[0]);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f4((v8hf)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f5((v8bf)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+  
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f6((v8hi)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+  u1.x = (__m128i)f7((__m128i)s1.x);
+  if (check_union128i_w (u1, e1))
+    abort ();
+
+  s2.x = _mm256_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, 
-32766, 30158,
+                          -12876, -12886, -12776, -22876, -22886, -22776, 
-32766, 30158);
+  e2[0] = s2.a[0];  
+  u2.x = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1);
+  u2.x = (__m256i)f8(((_Float16*)s2.a)[0]);
+  if (check_union256i_w (u2, e2))
+    abort ();
+}
-- 
2.43.5

[PATCH 10/12] AVX10.2: Support vector copy instructions

Reply via email to