From: "Zhang, Jun" <jun.zh...@intel.com> gcc/ChangeLog:
* config/config.gcc: Add avx10_2copyintrin.h. * config/i386/i386.md (avx10_2): New isa attribute. * config/i386/immintrin.h: Include avx10_2copyintrin.h. * config/i386/sse.md (sse_movss_<mode>): Add new constraints to handle AVX10.2. (vec_set<mode>_0): Ditto. (@vec_set<mode>_0): Ditto. (vec_set<mode>_0): Ditto. (avx512fp16_mov<mode>): Ditto. (*vec_set<mode>_0_1): New split. * config/i386/avx10_2copyintrin.h: New file. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-vmovd-1.c: New test. * gcc.target/i386/avx10_2-vmovd-2.c: Ditto. * gcc.target/i386/avx10_2-vmovw-1.c: Ditto. * gcc.target/i386/avx10_2-vmovw-2.c: Ditto. --- gcc/config.gcc | 3 +- gcc/config/i386/avx10_2copyintrin.h | 38 +++++ gcc/config/i386/i386.md | 3 +- gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 138 +++++++++++------- .../gcc.target/i386/avx10_2-vmovd-1.c | 48 ++++++ .../gcc.target/i386/avx10_2-vmovd-2.c | 44 ++++++ .../gcc.target/i386/avx10_2-vmovw-1.c | 69 +++++++++ .../gcc.target/i386/avx10_2-vmovw-2.c | 64 ++++++++ 9 files changed, 356 insertions(+), 53 deletions(-) create mode 100644 gcc/config/i386/avx10_2copyintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c diff --git a/gcc/config.gcc b/gcc/config.gcc index cd8a34b292f..e887c9c7432 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -457,7 +457,8 @@ i[34567]86-*-* | x86_64-*-*) avx10_2convertintrin.h avx10_2-512convertintrin.h avx10_2bf16intrin.h avx10_2-512bf16intrin.h avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h - avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h" + avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h + avx10_2copyintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx10_2copyintrin.h b/gcc/config/i386/avx10_2copyintrin.h new file mode 100644 index 00000000000..f1150c71dbf --- /dev/null +++ b/gcc/config/i386/avx10_2copyintrin.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of GCC. + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2COPYINTRIN_H_INCLUDED +#define _AVX10_2COPYINTRIN_H_INCLUDED + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_epi32 (__m128i __A) +{ + return _mm_set_epi32 (0, 0, 0, ((__v4si) __A)[0]); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_epi16 (__m128i __A) +{ + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi) __A)[0]); +} + +#endif /* _AVX10_2COPYINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 470ae5444db..e28f9bb5eae 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -582,7 +582,7 @@ noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf,apx_cfcmov" + vaes_avx512vl,noapx_nf,apx_cfcmov,avx10_2" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -979,6 +979,7 @@ (symbol_ref "TARGET_APX_NDD && Pmode == DImode") (eq_attr "isa" "vaes_avx512vl") (symbol_ref "TARGET_VAES && TARGET_AVX512VL") + (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 0d5af155c36..6b8035e6467 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -160,4 +160,6 @@ #include <avx10_2-512minmaxintrin.h> +#include <avx10_2copyintrin.h> + #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 956cdba55d3..93aa6d46ae4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11505,19 +11505,20 @@ (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) (define_insn "sse_movss_<mode>" - [(set (match_operand:VI4F_128 0 "register_operand" "=x,v") + [(set (match_operand:VI4F_128 0 "register_operand" "=x,v,v") (vec_merge:VI4F_128 - (match_operand:VI4F_128 2 "register_operand" " x,v") - (match_operand:VI4F_128 1 "register_operand" " 0,v") + (match_operand:VI4F_128 2 "register_operand" " x,v,v") + (match_operand:VI4F_128 1 "reg_or_0_operand" " 0,v,C") (const_int 1)))] "TARGET_SSE" "@ movss\t{%2, %0|%0, %2} - vmovss\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + vmovss\t{%2, %1, %0|%0, %1, %2} + vmovd\t{%2, %0|%0, %2}" + [(set_attr "isa" "noavx,avx,avx10_2") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,maybe_evex") - (set_attr "mode" "SF")]) + (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "mode" "SF,SF,SI")]) (define_insn "avx2_vec_dup<mode>" [(set (match_operand:VF1_128_256 0 "register_operand" "=v") @@ -11687,18 +11688,19 @@ ;; see comment above inline_secondary_memory_needed function in i386.cc (define_insn "vec_set<mode>_0" [(set (match_operand:VI4F_128 0 "nonimmediate_operand" - "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x ,m ,m ,m") + "=Yr,*x,v,v,v,v,x,x,v,Yr ,?x ,x ,m ,m ,m") (vec_merge:VI4F_128 (vec_duplicate:VI4F_128 (match_operand:<ssescalarmode> 2 "general_operand" - " Yr,*x,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF")) + " Yr,*x,v,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF")) (match_operand:VI4F_128 1 "nonimm_or_0_operand" - " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") + " C , C,C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") (const_int 1)))] "TARGET_SSE" "@ insertps\t{$0xe, %2, %0|%0, %2, 0xe} insertps\t{$0xe, %2, %0|%0, %2, 0xe} + vmovd\t{%2, %0|%0, %2} vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe} %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} %vmovd\t{%2, %0|%0, %2} @@ -11712,22 +11714,24 @@ # #" [(set (attr "isa") - (cond [(eq_attr "alternative" "0,1,8,9") + (cond [(eq_attr "alternative" "0,1,9,10") (const_string "sse4_noavx") - (eq_attr "alternative" "2,7,10") + (eq_attr "alternative" "2") + (const_string "avx10_2") + (eq_attr "alternative" "3,8,11") (const_string "avx") - (eq_attr "alternative" "3,4") + (eq_attr "alternative" "4,5") (const_string "sse2") - (eq_attr "alternative" "5,6") + (eq_attr "alternative" "6,7") (const_string "noavx") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "0,1,2,8,9,10") + (cond [(eq_attr "alternative" "0,1,3,9,10,11") (const_string "sselog") - (eq_attr "alternative" "12") - (const_string "imov") (eq_attr "alternative" "13") + (const_string "imov") + (eq_attr "alternative" "14") (const_string "fmov") ] (const_string "ssemov"))) @@ -11736,45 +11740,46 @@ (const_string "gpr16") (const_string "*"))) (set (attr "prefix_extra") - (if_then_else (eq_attr "alternative" "8,9,10") + (if_then_else (eq_attr "alternative" "9,10,11") (const_string "1") (const_string "*"))) (set (attr "length_immediate") - (if_then_else (eq_attr "alternative" "8,9,10") + (if_then_else (eq_attr "alternative" "9,10,11") (const_string "1") (const_string "*"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "0,1,5,6,8,9") + (cond [(eq_attr "alternative" "0,1,6,7,9,10") (const_string "orig") - (eq_attr "alternative" "2") + (eq_attr "alternative" "2,3") (const_string "maybe_evex") - (eq_attr "alternative" "3,4") + (eq_attr "alternative" "4,5") (const_string "maybe_vex") - (eq_attr "alternative" "7,10") + (eq_attr "alternative" "8,11") (const_string "vex") ] (const_string "*"))) - (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*") + (set_attr "mode" "SF,SF,SI,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*") (set (attr "preferred_for_speed") - (cond [(eq_attr "alternative" "4") + (cond [(eq_attr "alternative" "5") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true")))]) (define_insn "@vec_set<mode>_0" [(set (match_operand:V8_128 0 "register_operand" - "=v,v,v,x,x,Yr,*x,x,x,x,v,v") + "=v,v,v,v,x,x,Yr,*x,x,x,x,v,v") (vec_merge:V8_128 (vec_duplicate:V8_128 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" - " r,m,v,r,m,Yr,*x,r,m,x,r,m")) + " r,m,v,v,r,m,Yr,*x,r,m,x,r,m")) (match_operand:V8_128 1 "reg_or_0_operand" - " C,C,v,0,0,0 ,0 ,x,x,x,v,v") + " C,C,C,v,0,0,0 ,0 ,x,x,x,v,v") (const_int 1)))] "TARGET_SSE2" "@ vmovw\t{%k2, %0|%0, %k2} vmovw\t{%2, %0|%0, %2} + vmovw\t{%2, %0|%0, %2} vmovsh\t{%2, %1, %0|%0, %1, %2} pinsrw\t{$0, %k2, %0|%0, %k2, 0} pinsrw\t{$0, %2, %0|%0, %2, 0} @@ -11786,65 +11791,92 @@ vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0} vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}" [(set (attr "isa") - (cond [(eq_attr "alternative" "0,1,2") + (cond [(eq_attr "alternative" "0,1,3") (const_string "avx512fp16") - (eq_attr "alternative" "3,4") + (eq_attr "alternative" "2") + (const_string "avx10_2") + (eq_attr "alternative" "4,5") (const_string "noavx") - (eq_attr "alternative" "5,6") + (eq_attr "alternative" "6,7") (const_string "sse4_noavx") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "8,9,10") (const_string "avx") - (eq_attr "alternative" "10,11") + (eq_attr "alternative" "11,12") (const_string "avx512bw") ] (const_string "*"))) (set (attr "type") - (if_then_else (eq_attr "alternative" "0,1,2,5,6,9") + (if_then_else (eq_attr "alternative" "0,1,2,3,6,7,10") (const_string "ssemov") (const_string "sselog"))) (set (attr "prefix_data16") - (if_then_else (eq_attr "alternative" "3,4") + (if_then_else (eq_attr "alternative" "4,5") (const_string "1") (const_string "*"))) (set (attr "prefix_extra") - (if_then_else (eq_attr "alternative" "5,6,9") + (if_then_else (eq_attr "alternative" "6,7,10") (const_string "1") (const_string "*"))) (set (attr "length_immediate") - (if_then_else (eq_attr "alternative" "0,1,2") + (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "*") (const_string "1"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "0,1,2,10,11") + (cond [(eq_attr "alternative" "0,1,2,3,11,12") (const_string "evex") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "8,9,10") (const_string "vex") ] (const_string "orig"))) (set (attr "mode") - (if_then_else (eq_attr "alternative" "0,1,2") + (if_then_else (eq_attr "alternative" "0,1,2,3") (const_string "HF") (const_string "TI"))) (set (attr "enabled") (cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == V8BFmode")) - (eq_attr "alternative" "2")) + (eq_attr "alternative" "3")) (symbol_ref "false") ] (const_string "*")))]) +(define_insn_and_split "*vec_set<mode>_0_1" + [(set (match_operand:V8_128 0 "register_operand") + (vec_merge:V8_128 + (vec_duplicate:V8_128 + (vec_select:<ssescalarmode> + (match_operand:V8_128 2 "nonimmediate_operand") + (parallel [(const_int 0)]))) + (match_operand:V8_128 1 "reg_or_0_operand") + (const_int 1)))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V8_128 + (vec_duplicate:V8_128 (match_dup 2)) + (match_dup 1) + (const_int 1)))] +{ + if (register_operand (operands[2], <MODE>mode)) + operands[2] = force_reg (<MODE>mode, operands[2]); + operands[2] = gen_lowpart (<ssescalarmode>mode, operands[2]); +}) + ;; vmovw clears also the higer bits (define_insn "vec_set<mode>_0" - [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v") + [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v,v") (vec_merge:VI2F_256_512 (vec_duplicate:VI2F_256_512 - (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m")) + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,v")) (match_operand:VI2F_256_512 1 "const0_operand") (const_int 1)))] "TARGET_AVX512FP16" "@ vmovw\t{%k2, %x0|%x0, %k2} + vmovw\t{%2, %x0|%x0, %2} vmovw\t{%2, %x0|%x0, %2}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "*,*,avx10_2") + (set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) @@ -11889,16 +11921,20 @@ }) (define_insn "avx512fp16_mov<mode>" - [(set (match_operand:V8_128 0 "register_operand" "=v") + [(set (match_operand:V8_128 0 "register_operand" "=v,v") (vec_merge:V8_128 - (match_operand:V8_128 2 "register_operand" "v") - (match_operand:V8_128 1 "register_operand" "v") + (match_operand:V8_128 2 "register_operand" "v,v") + (match_operand:V8_128 1 "reg_or_0_operand" "v,C") (const_int 1)))] - "TARGET_AVX512FP16" - "vmovsh\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemov") + "TARGET_AVX512FP16 + || (TARGET_AVX10_2_256 && const0_operand (operands[1], <MODE>mode))" + "@ + vmovsh\t{%2, %1, %0|%0, %1, %2} + vmovw\t{%2, %0|%2, %0}" + [(set_attr "isa" "*,avx10_2") + (set_attr "type" "ssemov") (set_attr "prefix" "evex") - (set_attr "mode" "HF")]) + (set_attr "mode" "HF,HI")]) ;; A subset is vec_setv4sf. (define_insn "*vec_setv4sf_sse4_1" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c new file mode 100644 index 00000000000..275bbade106 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-final { scan-assembler-times "vmovd\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovss\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovd\t%edi, %xmm0" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 4 { target { ! ia32 } } } } */ + + +#include<immintrin.h> + +typedef int v4si __attribute__((vector_size(16))); +typedef float v4sf __attribute__((vector_size(16))); + +v4si +__attribute__((noipa, unused)) +f1 (int a) +{ + return __extension__(v4si){a, 0, 0, 0}; +} + +v4sf +__attribute__((noipa, unused)) +f2 (float a) +{ + return __extension__(v4sf){a, 0, 0, 0}; +} + +v4si +__attribute__((noipa, unused)) +f3 (v4si a) +{ + return __extension__(v4si){a[0], 0, 0, 0}; +} + +v4sf +__attribute__((noipa, unused)) +f4 (v4sf a) +{ + return __extension__(v4sf){a[0], 0, 0, 0}; +} + +__m128i +__attribute__((noipa, unused)) +f5 (__m128i a) +{ + return _mm_set_epi32 (0, 0, 0,((__v4si)a)[0]); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c new file mode 100644 index 00000000000..7d659300d81 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR + +#include "avx10-helper.h" +#include "avx10_2-vmovd-1.c" + +static void +TEST (void) +{ + union128i_d u1, s1; + int e1[4] = {0}; + + s1.x = _mm_set_epi32(-12876, -12886, -12776, 3376590); + e1[0] = s1.a[0]; + + u1.x = _mm_set_epi32(-1, -1, -1, -1); + u1.x = (__m128i)f1((int)s1.a[0]); + if (check_union128i_d (u1, e1)) + abort (); + + u1.x = _mm_set_epi32(-1, -1, -1, -1); + u1.x = (__m128i)f2(((float*)s1.a)[0]); + if (check_union128i_d (u1, e1)) + abort (); + + u1.x = _mm_set_epi32(-1, -1, -1, -1); + u1.x = (__m128i)f3((v4si)s1.x); + if (check_union128i_d (u1, e1)) + abort (); + + u1.x = _mm_set_epi32(-1, -1, -1, -1); + u1.x = (__m128i)f4((v4sf)s1.x); + if (check_union128i_d (u1, e1)) + abort (); + + u1.x = _mm_set_epi32(-1, -1, -1, -1); + u1.x = (__m128i)f5((__m128i)s1.x); + if (check_union128i_d (u1, e1)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c new file mode 100644 index 00000000000..ec19a9a263a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-final { scan-assembler-times "vmovw\t4\\(%esp\\), %xmm0" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovw\t8\\(%ebp\\), %xmm0" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 4 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vmovw\t%edi, %xmm0" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 7 { target { ! ia32 } } } } */ + +#include<immintrin.h> + +typedef _Float16 v8hf __attribute__((vector_size(16))); +typedef __bf16 v8bf __attribute__((vector_size(16))); +typedef short v8hi __attribute__((vector_size(16))); + +v8hf +__attribute__((noipa, unused)) +f1 (_Float16 a) +{ + return __extension__(v8hf){a, 0, 0, 0, 0, 0, 0, 0}; +} + +v8bf +__attribute__((noipa, unused)) +f2 (__bf16 a) +{ + return __extension__(v8bf){a, 0, 0, 0, 0, 0, 0, 0}; +} + +v8hi +__attribute__((noipa, unused)) +f3 (short a) +{ + return __extension__(v8hi){a, 0, 0, 0, 0, 0, 0, 0}; +} + +v8hf +__attribute__((noipa, unused)) +f4 (v8hf a) +{ + return __extension__(v8hf){a[0], 0, 0, 0, 0, 0, 0, 0}; +} + +v8bf +__attribute__((noipa, unused)) +f5 (v8bf a) +{ + return __extension__(v8bf){a[0], 0, 0, 0, 0, 0, 0, 0}; +} + +v8hi +__attribute__((noipa, unused)) +f6 (v8hi a) +{ + return __extension__(v8hi){a[0], 0, 0, 0, 0, 0, 0, 0}; +} + +__m128i +__attribute__((noipa, unused)) +f7 (__m128i a) +{ + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi)a)[0]); +} + +__m256h +__attribute__((noipa, unused)) +f8 (_Float16 a) +{ + return _mm256_set_ph (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, a); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c new file mode 100644 index 00000000000..d63739e6887 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR + +#include "avx10-helper.h" +#include "avx10_2-vmovw-1.c" + +static void +TEST (void) +{ + union128i_w u1, s1; + union256i_w u2, s2; + short e1[8] = {0}; + short e2[16] = {0}; + + s1.x = _mm_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158); + e1[0] = s1.a[0]; + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f1(((_Float16*)s1.a)[0]); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f2(((__bf16*)s1.a)[0]); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f3((short)s1.a[0]); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f4((v8hf)s1.x); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f5((v8bf)s1.x); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f6((v8hi)s1.x); + if (check_union128i_w (u1, e1)) + abort (); + + u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1); + u1.x = (__m128i)f7((__m128i)s1.x); + if (check_union128i_w (u1, e1)) + abort (); + + s2.x = _mm256_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158, + -12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158); + e2[0] = s2.a[0]; + u2.x = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + u2.x = (__m256i)f8(((_Float16*)s2.a)[0]); + if (check_union256i_w (u2, e2)) + abort (); +} -- 2.43.5